From d61a4eff092ccdbdc8dacf8e0ca4c767d803ad16 Mon Sep 17 00:00:00 2001 From: Kenan Yao Date: Wed, 5 Dec 2018 17:23:18 +0800 Subject: [PATCH] plan: derive `col is not null` from `col op col` conditions Check whether the `op` condition is null rejected, if yes, derive `col is not null` for both sides of inner join, and for inner side of outer join. Also, remove duplicate conditions before pushing them down. Check if the condition is mutable before removing duplicates. For AntiSemiJoin, AntiLeftOuterSemiJoin and LeftOuterSemiJoin, do not generate new `is not null` conditions. --- cmd/explaintest/r/explain_complex.result | 14 +- .../r/explain_complex_stats.result | 14 +- cmd/explaintest/r/explain_easy.result | 93 ++++++------ cmd/explaintest/r/explain_easy_stats.result | 28 ++-- cmd/explaintest/r/select.result | 20 +-- cmd/explaintest/r/topn_push_down.result | 29 ++-- executor/index_lookup_join_test.go | 61 ++++---- executor/join_test.go | 48 ++----- expression/builtin.go | 4 +- expression/constant_propagation.go | 29 +++- expression/constant_propagation_test.go | 18 +-- expression/function_traits.go | 41 ++++++ expression/util.go | 44 ++++++ planner/core/cbo_test.go | 44 +++--- planner/core/expression_rewriter.go | 10 +- planner/core/logical_plan_builder.go | 44 ++++-- planner/core/logical_plan_test.go | 134 ++++++++++++++++++ planner/core/logical_plans.go | 2 +- planner/core/rule_predicate_push_down.go | 50 +++++-- 19 files changed, 506 insertions(+), 221 deletions(-) diff --git a/cmd/explaintest/r/explain_complex.result b/cmd/explaintest/r/explain_complex.result index 2b43ae3a2357a..3dd044a6a89be 100644 --- a/cmd/explaintest/r/explain_complex.result +++ b/cmd/explaintest/r/explain_complex.result @@ -121,11 +121,11 @@ Projection_13 1.00 root gad.id, test.dd.id, gad.aid, gad.cm, test.dd.dic, test.d └─IndexJoin_24 0.00 root inner join, inner:IndexLookUp_23, outer key:gad.aid, inner key:test.dd.aid, other cond:eq(test.dd.ip, gad.ip), gt(test.dd.t, gad.t) ├─IndexLookUp_23 0.00 root │ ├─IndexScan_20 10.00 cop table:dd, index:aid, dic, range: decided by [gad.aid gad.ip], keep order:false, stats:pseudo - │ └─Selection_22 0.00 cop eq(test.dd.bm, 0), eq(test.dd.pt, "android"), gt(test.dd.t, 1478143908) + │ └─Selection_22 0.00 cop eq(test.dd.bm, 0), eq(test.dd.pt, "android"), gt(test.dd.t, 1478143908), not(isnull(test.dd.ip)), not(isnull(test.dd.t)) │ └─TableScan_21 10.00 cop table:dd, keep order:false, stats:pseudo └─IndexLookUp_33 3.33 root ├─IndexScan_30 3333.33 cop table:gad, index:t, range:(1478143908,+inf], keep order:false, stats:pseudo - └─Selection_32 3.33 cop eq(gad.pt, "android") + └─Selection_32 3.33 cop eq(gad.pt, "android"), not(isnull(gad.ip)) └─TableScan_31 3333.33 cop table:st, keep order:false, stats:pseudo explain select gad.id as gid,sdk.id as sid,gad.aid as aid,gad.cm as cm,sdk.dic as dic,sdk.ip as ip, sdk.t as t, gad.p1 as p1, gad.p2 as p2, gad.p3 as p3, gad.p4 as p4, gad.p5 as p5, gad.p6_md5 as p6, gad.p7_md5 as p7, gad.ext as ext from st gad join dd sdk on gad.aid = sdk.aid and gad.dic = sdk.mac and gad.t < sdk.t where gad.t > 1477971479 and gad.bm = 0 and gad.pt = 'ios' and gad.dit = 'mac' and sdk.t > 1477971479 and sdk.bm = 0 and sdk.pt = 'ios' limit 3000; id count task operator info @@ -134,11 +134,11 @@ Projection_10 0.00 root gad.id, sdk.id, gad.aid, gad.cm, sdk.dic, sdk.ip, sdk.t, └─IndexJoin_18 0.00 root inner join, inner:IndexLookUp_17, outer key:gad.aid, inner key:sdk.aid, other cond:eq(gad.dic, sdk.mac), lt(gad.t, sdk.t) ├─IndexLookUp_27 0.00 root │ ├─IndexScan_24 3333.33 cop table:gad, index:t, range:(1477971479,+inf], keep order:false, stats:pseudo - │ └─Selection_26 0.00 cop eq(gad.bm, 0), eq(gad.dit, "mac"), eq(gad.pt, "ios") + │ └─Selection_26 0.00 cop eq(gad.bm, 0), eq(gad.dit, "mac"), eq(gad.pt, "ios"), not(isnull(gad.dic)) │ └─TableScan_25 3333.33 cop table:st, keep order:false, stats:pseudo └─IndexLookUp_17 0.00 root ├─IndexScan_14 10.00 cop table:sdk, index:aid, dic, range: decided by [gad.aid gad.dic], keep order:false, stats:pseudo - └─Selection_16 0.00 cop eq(sdk.bm, 0), eq(sdk.pt, "ios"), gt(sdk.t, 1477971479) + └─Selection_16 0.00 cop eq(sdk.bm, 0), eq(sdk.pt, "ios"), gt(sdk.t, 1477971479), not(isnull(sdk.mac)), not(isnull(sdk.t)) └─TableScan_15 10.00 cop table:dd, keep order:false, stats:pseudo explain SELECT cm, p1, p2, p3, p4, p5, p6_md5, p7_md5, count(1) as click_pv, count(DISTINCT ip) as click_ip FROM st WHERE (t between 1478188800 and 1478275200) and aid='cn.sbkcq' and pt='android' GROUP BY cm, p1, p2, p3, p4, p5, p6_md5, p7_md5; id count task operator info @@ -153,9 +153,9 @@ id count task operator info Projection_10 0.00 root dt.id, dt.aid, dt.pt, dt.dic, dt.cm, rr.gid, rr.acd, rr.t, dt.p1, dt.p2, dt.p3, dt.p4, dt.p5, dt.p6_md5, dt.p7_md5 └─Limit_13 0.00 root offset:0, count:2000 └─IndexJoin_19 0.00 root inner join, inner:IndexLookUp_18, outer key:dt.aid, dt.dic, inner key:rr.aid, rr.dic - ├─TableReader_43 0.00 root data:Selection_42 - │ └─Selection_42 0.00 cop eq(dt.bm, 0), eq(dt.pt, "ios"), gt(dt.t, 1478185592) - │ └─TableScan_41 10000.00 cop table:dt, range:[0,+inf], keep order:false, stats:pseudo + ├─TableReader_45 0.00 root data:Selection_44 + │ └─Selection_44 0.00 cop eq(dt.bm, 0), eq(dt.pt, "ios"), gt(dt.t, 1478185592), not(isnull(dt.dic)) + │ └─TableScan_43 10000.00 cop table:dt, range:[0,+inf], keep order:false, stats:pseudo └─IndexLookUp_18 3.33 root ├─IndexScan_15 10.00 cop table:rr, index:aid, dic, range: decided by [dt.aid dt.dic], keep order:false, stats:pseudo └─Selection_17 3.33 cop eq(rr.pt, "ios"), gt(rr.t, 1478185592) diff --git a/cmd/explaintest/r/explain_complex_stats.result b/cmd/explaintest/r/explain_complex_stats.result index 1569a2d3a850c..13935717900f5 100644 --- a/cmd/explaintest/r/explain_complex_stats.result +++ b/cmd/explaintest/r/explain_complex_stats.result @@ -130,11 +130,11 @@ Projection_13 424.00 root gad.id, test.dd.id, gad.aid, gad.cm, test.dd.dic, test └─HashAgg_19 424.00 root group by:gad.aid, test.dd.dic, funcs:firstrow(gad.id), firstrow(gad.aid), firstrow(gad.cm), firstrow(gad.p1), firstrow(gad.p2), firstrow(gad.p3), firstrow(gad.p4), firstrow(gad.p5), firstrow(gad.p6_md5), firstrow(gad.p7_md5), firstrow(gad.ext), firstrow(gad.t), firstrow(test.dd.id), firstrow(test.dd.dic), firstrow(test.dd.ip), firstrow(test.dd.t) └─IndexJoin_24 424.00 root inner join, inner:IndexLookUp_23, outer key:gad.aid, inner key:test.dd.aid, other cond:eq(gad.ip, test.dd.ip), gt(test.dd.t, gad.t) ├─TableReader_29 424.00 root data:Selection_28 - │ └─Selection_28 424.00 cop eq(gad.bm, 0), eq(gad.pt, "android"), gt(gad.t, 1478143908) + │ └─Selection_28 424.00 cop eq(gad.bm, 0), eq(gad.pt, "android"), gt(gad.t, 1478143908), not(isnull(gad.ip)) │ └─TableScan_27 1999.00 cop table:gad, range:[0,+inf], keep order:false └─IndexLookUp_23 455.80 root ├─IndexScan_20 1.00 cop table:dd, index:aid, dic, range: decided by [gad.aid gad.ip], keep order:false - └─Selection_22 455.80 cop eq(test.dd.bm, 0), eq(test.dd.pt, "android"), gt(test.dd.t, 1478143908) + └─Selection_22 455.80 cop eq(test.dd.bm, 0), eq(test.dd.pt, "android"), gt(test.dd.t, 1478143908), not(isnull(test.dd.ip)), not(isnull(test.dd.t)) └─TableScan_21 1.00 cop table:dd, keep order:false explain select gad.id as gid,sdk.id as sid,gad.aid as aid,gad.cm as cm,sdk.dic as dic,sdk.ip as ip, sdk.t as t, gad.p1 as p1, gad.p2 as p2, gad.p3 as p3, gad.p4 as p4, gad.p5 as p5, gad.p6_md5 as p6, gad.p7_md5 as p7, gad.ext as ext from st gad join dd sdk on gad.aid = sdk.aid and gad.dic = sdk.mac and gad.t < sdk.t where gad.t > 1477971479 and gad.bm = 0 and gad.pt = 'ios' and gad.dit = 'mac' and sdk.t > 1477971479 and sdk.bm = 0 and sdk.pt = 'ios' limit 3000; id count task operator info @@ -142,11 +142,11 @@ Projection_10 170.34 root gad.id, sdk.id, gad.aid, gad.cm, sdk.dic, sdk.ip, sdk. └─Limit_13 170.34 root offset:0, count:3000 └─IndexJoin_18 170.34 root inner join, inner:IndexLookUp_17, outer key:gad.aid, inner key:sdk.aid, other cond:eq(gad.dic, sdk.mac), lt(gad.t, sdk.t) ├─TableReader_23 170.34 root data:Selection_22 - │ └─Selection_22 170.34 cop eq(gad.bm, 0), eq(gad.dit, "mac"), eq(gad.pt, "ios"), gt(gad.t, 1477971479) + │ └─Selection_22 170.34 cop eq(gad.bm, 0), eq(gad.dit, "mac"), eq(gad.pt, "ios"), gt(gad.t, 1477971479), not(isnull(gad.dic)) │ └─TableScan_21 1999.00 cop table:gad, range:[0,+inf], keep order:false └─IndexLookUp_17 509.04 root ├─IndexScan_14 1.00 cop table:sdk, index:aid, dic, range: decided by [gad.aid gad.dic], keep order:false - └─Selection_16 509.04 cop eq(sdk.bm, 0), eq(sdk.pt, "ios"), gt(sdk.t, 1477971479) + └─Selection_16 509.04 cop eq(sdk.bm, 0), eq(sdk.pt, "ios"), gt(sdk.t, 1477971479), not(isnull(sdk.mac)), not(isnull(sdk.t)) └─TableScan_15 1.00 cop table:dd, keep order:false explain SELECT cm, p1, p2, p3, p4, p5, p6_md5, p7_md5, count(1) as click_pv, count(DISTINCT ip) as click_ip FROM st WHERE (t between 1478188800 and 1478275200) and aid='cn.sbkcq' and pt='android' GROUP BY cm, p1, p2, p3, p4, p5, p6_md5, p7_md5; id count task operator info @@ -161,9 +161,9 @@ id count task operator info Projection_10 428.32 root dt.id, dt.aid, dt.pt, dt.dic, dt.cm, rr.gid, rr.acd, rr.t, dt.p1, dt.p2, dt.p3, dt.p4, dt.p5, dt.p6_md5, dt.p7_md5 └─Limit_13 428.32 root offset:0, count:2000 └─IndexJoin_19 428.32 root inner join, inner:IndexLookUp_18, outer key:dt.aid, dt.dic, inner key:rr.aid, rr.dic - ├─TableReader_43 428.32 root data:Selection_42 - │ └─Selection_42 428.32 cop eq(dt.bm, 0), eq(dt.pt, "ios"), gt(dt.t, 1478185592) - │ └─TableScan_41 2000.00 cop table:dt, range:[0,+inf], keep order:false + ├─TableReader_45 428.32 root data:Selection_44 + │ └─Selection_44 428.32 cop eq(dt.bm, 0), eq(dt.pt, "ios"), gt(dt.t, 1478185592), not(isnull(dt.dic)) + │ └─TableScan_43 2000.00 cop table:dt, range:[0,+inf], keep order:false └─IndexLookUp_18 970.00 root ├─IndexScan_15 1.00 cop table:rr, index:aid, dic, range: decided by [dt.aid dt.dic], keep order:false └─Selection_17 970.00 cop eq(rr.pt, "ios"), gt(rr.t, 1478185592) diff --git a/cmd/explaintest/r/explain_easy.result b/cmd/explaintest/r/explain_easy.result index dca41dcc8e468..058b67576d477 100644 --- a/cmd/explaintest/r/explain_easy.result +++ b/cmd/explaintest/r/explain_easy.result @@ -42,12 +42,13 @@ IndexReader_9 10.00 root index:IndexScan_8 └─IndexScan_8 10.00 cop table:t1, index:c2, range:[1,1], keep order:false, stats:pseudo explain select * from t1 left join t2 on t1.c2 = t2.c1 where t1.c1 > 1; id count task operator info -IndexJoin_11 4166.67 root left outer join, inner:IndexLookUp_10, outer key:test.t1.c2, inner key:test.t2.c1 -├─TableReader_23 3333.33 root data:TableScan_22 -│ └─TableScan_22 3333.33 cop table:t1, range:(1,+inf], keep order:false, stats:pseudo -└─IndexLookUp_10 10.00 root - ├─IndexScan_8 10.00 cop table:t2, index:c1, range: decided by [test.t1.c2], keep order:false, stats:pseudo - └─TableScan_9 10.00 cop table:t2, keep order:false, stats:pseudo +IndexJoin_12 4166.67 root left outer join, inner:IndexLookUp_11, outer key:test.t1.c2, inner key:test.t2.c1 +├─TableReader_24 3333.33 root data:TableScan_23 +│ └─TableScan_23 3333.33 cop table:t1, range:(1,+inf], keep order:false, stats:pseudo +└─IndexLookUp_11 0.00 root + ├─Selection_10 0.00 cop not(isnull(test.t2.c1)) + │ └─IndexScan_8 10.00 cop table:t2, index:c1, range: decided by [test.t1.c2], keep order:false, stats:pseudo + └─TableScan_9 0.00 cop table:t2, keep order:false explain update t1 set t1.c2 = 2 where t1.c1 = 1; id count task operator info Point_Get_1 1.00 root table:t1, handle:1 @@ -58,12 +59,13 @@ IndexLookUp_9 10.00 root └─TableScan_8 10.00 cop table:t1, keep order:false, stats:pseudo explain select count(b.c2) from t1 a, t2 b where a.c1 = b.c2 group by a.c1; id count task operator info -Projection_11 10000.00 root cast(join_agg_0) -└─IndexJoin_14 10000.00 root inner join, inner:TableReader_13, outer key:b.c2, inner key:a.c1 - ├─HashAgg_21 8000.00 root group by:col_2, funcs:count(col_0), firstrow(col_1) - │ └─TableReader_22 8000.00 root data:HashAgg_17 - │ └─HashAgg_17 8000.00 cop group by:b.c2, funcs:count(b.c2), firstrow(b.c2) - │ └─TableScan_20 10000.00 cop table:b, range:[-inf,+inf], keep order:false, stats:pseudo +Projection_11 9990.00 root cast(join_agg_0) +└─IndexJoin_14 9990.00 root inner join, inner:TableReader_13, outer key:b.c2, inner key:a.c1 + ├─HashAgg_22 7992.00 root group by:col_2, funcs:count(col_0), firstrow(col_1) + │ └─TableReader_23 7992.00 root data:HashAgg_17 + │ └─HashAgg_17 7992.00 cop group by:b.c2, funcs:count(b.c2), firstrow(b.c2) + │ └─Selection_21 9990.00 cop not(isnull(b.c2)) + │ └─TableScan_20 10000.00 cop table:b, range:[-inf,+inf], keep order:false, stats:pseudo └─TableReader_13 10.00 root data:TableScan_12 └─TableScan_12 10.00 cop table:a, range: decided by [b.c2], keep order:false, stats:pseudo explain select * from t2 order by t2.c2 limit 0, 1; @@ -94,12 +96,13 @@ StreamAgg_12 1.00 root funcs:sum(5_aux_0) └─IndexScan_22 10000.00 cop table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo explain select c1 from t1 where c1 in (select c2 from t2); id count task operator info -Projection_9 10000.00 root test.t1.c1 -└─IndexJoin_12 10000.00 root inner join, inner:TableReader_11, outer key:test.t2.c2, inner key:test.t1.c1 - ├─HashAgg_19 8000.00 root group by:col_1, funcs:firstrow(col_0) - │ └─TableReader_20 8000.00 root data:HashAgg_15 - │ └─HashAgg_15 8000.00 cop group by:test.t2.c2, funcs:firstrow(test.t2.c2) - │ └─TableScan_18 10000.00 cop table:t2, range:[-inf,+inf], keep order:false, stats:pseudo +Projection_9 9990.00 root test.t1.c1 +└─IndexJoin_12 9990.00 root inner join, inner:TableReader_11, outer key:test.t2.c2, inner key:test.t1.c1 + ├─HashAgg_20 7992.00 root group by:col_1, funcs:firstrow(col_0) + │ └─TableReader_21 7992.00 root data:HashAgg_15 + │ └─HashAgg_15 7992.00 cop group by:test.t2.c2, funcs:firstrow(test.t2.c2) + │ └─Selection_19 9990.00 cop not(isnull(test.t2.c2)) + │ └─TableScan_18 10000.00 cop table:t2, range:[-inf,+inf], keep order:false, stats:pseudo └─TableReader_11 10.00 root data:TableScan_10 └─TableScan_10 10.00 cop table:t1, range: decided by [test.t2.c2], keep order:false, stats:pseudo explain select (select count(1) k from t1 s where s.c1 = t1.c1 having k != 0) from t1; @@ -305,12 +308,13 @@ Projection_11 10000.00 root 9_aux_0 ├─TableReader_15 10000.00 root data:TableScan_14 │ └─TableScan_14 10000.00 cop table:t, range:[-inf,+inf], keep order:false, stats:pseudo └─StreamAgg_20 1.00 root funcs:count(1) - └─IndexJoin_33 12.50 root inner join, inner:TableReader_32, outer key:s.c, inner key:t1.a - ├─IndexLookUp_38 10.00 root - │ ├─IndexScan_36 10.00 cop table:s, index:b, range: decided by [eq(s.b, test.t.a)], keep order:false, stats:pseudo - │ └─TableScan_37 10.00 cop table:t, keep order:false, stats:pseudo - └─TableReader_32 10.00 root data:TableScan_31 - └─TableScan_31 10.00 cop table:t1, range: decided by [s.c], keep order:false, stats:pseudo + └─IndexJoin_34 12.49 root inner join, inner:TableReader_33, outer key:s.c, inner key:t1.a + ├─IndexLookUp_40 9.99 root + │ ├─IndexScan_37 10.00 cop table:s, index:b, range: decided by [eq(s.b, test.t.a)], keep order:false, stats:pseudo + │ └─Selection_39 9.99 cop not(isnull(s.c)) + │ └─TableScan_38 10.00 cop table:t, keep order:false, stats:pseudo + └─TableReader_33 10.00 root data:TableScan_32 + └─TableScan_32 10.00 cop table:t1, range: decided by [s.c], keep order:false, stats:pseudo drop table if exists t; create table t(a int unsigned); explain select t.a = '123455' from t; @@ -356,13 +360,14 @@ id count task operator info TableDual_5 0.00 root rows:0 explain select * from t t1 join t t2 where t1.b = t2.b and t2.b is null; id count task operator info -Projection_7 12.50 root t1.a, t1.b, t2.a, t2.b -└─HashRightJoin_9 12.50 root inner join, inner:TableReader_12, equal:[eq(t2.b, t1.b)] - ├─TableReader_12 10.00 root data:Selection_11 - │ └─Selection_11 10.00 cop isnull(t2.b) +Projection_7 0.00 root t1.a, t1.b, t2.a, t2.b +└─HashRightJoin_9 0.00 root inner join, inner:TableReader_12, equal:[eq(t2.b, t1.b)] + ├─TableReader_12 0.00 root data:Selection_11 + │ └─Selection_11 0.00 cop isnull(t2.b), not(isnull(t2.b)) │ └─TableScan_10 10000.00 cop table:t2, range:[-inf,+inf], keep order:false, stats:pseudo - └─TableReader_14 10000.00 root data:TableScan_13 - └─TableScan_13 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo + └─TableReader_15 9990.00 root data:Selection_14 + └─Selection_14 9990.00 cop not(isnull(t1.b)) + └─TableScan_13 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo explain select * from t t1 where not exists (select * from t t2 where t1.b = t2.b); id count task operator info HashLeftJoin_9 8000.00 root anti semi join, inner:TableReader_13, equal:[eq(t1.b, t2.b)] @@ -480,14 +485,14 @@ Projection_12 10000.00 root 9_aux_0 ├─TableReader_16 10000.00 root data:TableScan_15 │ └─TableScan_15 10000.00 cop table:t, range:[-inf,+inf], keep order:false, stats:pseudo └─HashAgg_19 1.00 root funcs:count(join_agg_0) - └─HashRightJoin_21 10.00 root inner join, inner:HashAgg_27, equal:[eq(t1.a, s.a)] - ├─HashAgg_27 8.00 root group by:col_2, funcs:count(col_0), firstrow(col_1) - │ └─TableReader_28 8.00 root data:HashAgg_22 - │ └─HashAgg_22 8.00 cop group by:t1.a, funcs:count(1), firstrow(t1.a) - │ └─Selection_26 10.00 cop eq(t1.a, test.t.a) + └─HashRightJoin_21 9.99 root inner join, inner:HashAgg_27, equal:[eq(t1.a, s.a)] + ├─HashAgg_27 7.99 root group by:col_2, funcs:count(col_0), firstrow(col_1) + │ └─TableReader_28 7.99 root data:HashAgg_22 + │ └─HashAgg_22 7.99 cop group by:t1.a, funcs:count(1), firstrow(t1.a) + │ └─Selection_26 9.99 cop eq(t1.a, test.t.a), not(isnull(t1.a)) │ └─TableScan_25 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo - └─TableReader_34 10.00 root data:Selection_33 - └─Selection_33 10.00 cop eq(s.a, test.t.a) + └─TableReader_34 9.99 root data:Selection_33 + └─Selection_33 9.99 cop eq(s.a, test.t.a), not(isnull(s.a)) └─TableScan_32 10000.00 cop table:s, range:[-inf,+inf], keep order:false, stats:pseudo explain select * from t ta left outer join t tb on ta.nb = tb.nb and ta.a > 1 where ifnull(tb.a, 1) or tb.a is null; id count task operator info @@ -514,14 +519,14 @@ Projection_14 10000.00 root 9_aux_0 │ └─TableReader_19 10000.00 root data:TableScan_18 │ └─TableScan_18 10000.00 cop table:t, range:[-inf,+inf], keep order:false, stats:pseudo └─HashAgg_23 1.00 root funcs:count(join_agg_0) - └─HashRightJoin_25 10.00 root inner join, inner:HashAgg_31, equal:[eq(t1.a, s.a)] - ├─HashAgg_31 8.00 root group by:col_2, funcs:count(col_0), firstrow(col_1) - │ └─TableReader_32 8.00 root data:HashAgg_26 - │ └─HashAgg_26 8.00 cop group by:t1.a, funcs:count(1), firstrow(t1.a) - │ └─Selection_30 10.00 cop eq(t1.a, test.t.a) + └─HashRightJoin_25 9.99 root inner join, inner:HashAgg_31, equal:[eq(t1.a, s.a)] + ├─HashAgg_31 7.99 root group by:col_2, funcs:count(col_0), firstrow(col_1) + │ └─TableReader_32 7.99 root data:HashAgg_26 + │ └─HashAgg_26 7.99 cop group by:t1.a, funcs:count(1), firstrow(t1.a) + │ └─Selection_30 9.99 cop eq(t1.a, test.t.a), not(isnull(t1.a)) │ └─TableScan_29 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo - └─TableReader_38 10.00 root data:Selection_37 - └─Selection_37 10.00 cop eq(s.a, test.t.a) + └─TableReader_38 9.99 root data:Selection_37 + └─Selection_37 9.99 cop eq(s.a, test.t.a), not(isnull(s.a)) └─TableScan_36 10000.00 cop table:s, range:[-inf,+inf], keep order:false, stats:pseudo drop table if exists t; create table t(a int); diff --git a/cmd/explaintest/r/explain_easy_stats.result b/cmd/explaintest/r/explain_easy_stats.result index 12cdb96b29f03..ef9d2082e1252 100644 --- a/cmd/explaintest/r/explain_easy_stats.result +++ b/cmd/explaintest/r/explain_easy_stats.result @@ -47,13 +47,13 @@ explain select * from t1 left join t2 on t1.c2 = t2.c1 where t1.c1 > 1; id count task operator info Projection_6 2481.25 root test.t1.c1, test.t1.c2, test.t1.c3, test.t2.c1, test.t2.c2 └─MergeJoin_7 2481.25 root left outer join, left key:test.t1.c2, right key:test.t2.c1 - ├─IndexLookUp_17 1998.00 root - │ ├─Selection_16 1998.00 cop gt(test.t1.c1, 1) - │ │ └─IndexScan_14 1999.00 cop table:t1, index:c2, range:[NULL,+inf], keep order:true - │ └─TableScan_15 1998.00 cop table:t1, keep order:false - └─IndexLookUp_21 1985.00 root - ├─IndexScan_19 1985.00 cop table:t2, index:c1, range:[NULL,+inf], keep order:true - └─TableScan_20 1985.00 cop table:t2, keep order:false + ├─IndexLookUp_18 1998.00 root + │ ├─Selection_17 1998.00 cop gt(test.t1.c1, 1) + │ │ └─IndexScan_15 1999.00 cop table:t1, index:c2, range:[NULL,+inf], keep order:true + │ └─TableScan_16 1998.00 cop table:t1, keep order:false + └─IndexLookUp_22 1985.00 root + ├─IndexScan_20 1985.00 cop table:t2, index:c1, range:[-inf,+inf], keep order:true + └─TableScan_21 1985.00 cop table:t2, keep order:false explain update t1 set t1.c2 = 2 where t1.c1 = 1; id count task operator info Point_Get_1 1.00 root table:t1, handle:1 @@ -66,10 +66,11 @@ explain select count(b.c2) from t1 a, t2 b where a.c1 = b.c2 group by a.c1; id count task operator info Projection_11 1985.00 root cast(join_agg_0) └─IndexJoin_14 1985.00 root inner join, inner:TableReader_13, outer key:b.c2, inner key:a.c1 - ├─HashAgg_21 1985.00 root group by:col_2, funcs:count(col_0), firstrow(col_1) - │ └─TableReader_22 1985.00 root data:HashAgg_17 + ├─HashAgg_22 1985.00 root group by:col_2, funcs:count(col_0), firstrow(col_1) + │ └─TableReader_23 1985.00 root data:HashAgg_17 │ └─HashAgg_17 1985.00 cop group by:b.c2, funcs:count(b.c2), firstrow(b.c2) - │ └─TableScan_20 1985.00 cop table:b, range:[-inf,+inf], keep order:false + │ └─Selection_21 1985.00 cop not(isnull(b.c2)) + │ └─TableScan_20 1985.00 cop table:b, range:[-inf,+inf], keep order:false └─TableReader_13 1.00 root data:TableScan_12 └─TableScan_12 1.00 cop table:a, range: decided by [b.c2], keep order:false explain select * from t2 order by t2.c2 limit 0, 1; @@ -94,10 +95,11 @@ explain select c1 from t1 where c1 in (select c2 from t2); id count task operator info Projection_9 1985.00 root test.t1.c1 └─IndexJoin_12 1985.00 root inner join, inner:TableReader_11, outer key:test.t2.c2, inner key:test.t1.c1 - ├─HashAgg_19 1985.00 root group by:col_1, funcs:firstrow(col_0) - │ └─TableReader_20 1985.00 root data:HashAgg_15 + ├─HashAgg_20 1985.00 root group by:col_1, funcs:firstrow(col_0) + │ └─TableReader_21 1985.00 root data:HashAgg_15 │ └─HashAgg_15 1985.00 cop group by:test.t2.c2, funcs:firstrow(test.t2.c2) - │ └─TableScan_18 1985.00 cop table:t2, range:[-inf,+inf], keep order:false + │ └─Selection_19 1985.00 cop not(isnull(test.t2.c2)) + │ └─TableScan_18 1985.00 cop table:t2, range:[-inf,+inf], keep order:false └─TableReader_11 1.00 root data:TableScan_10 └─TableScan_10 1.00 cop table:t1, range: decided by [test.t2.c2], keep order:false explain select * from information_schema.columns; diff --git a/cmd/explaintest/r/select.result b/cmd/explaintest/r/select.result index 8571aa46d1455..beaa3d8e1ba42 100644 --- a/cmd/explaintest/r/select.result +++ b/cmd/explaintest/r/select.result @@ -309,17 +309,19 @@ drop table if exists t; create table t (id int primary key, a int, b int); explain select * from (t t1 left join t t2 on t1.a = t2.a) left join (t t3 left join t t4 on t3.a = t4.a) on t2.b = 1; id count task operator info -HashLeftJoin_10 156250000.00 root left outer join, inner:HashLeftJoin_16, left cond:[eq(t2.b, 1)] -├─HashLeftJoin_11 12500.00 root left outer join, inner:TableReader_15, equal:[eq(t1.a, t2.a)] +HashLeftJoin_10 155937656.25 root left outer join, inner:HashLeftJoin_17, left cond:[eq(t2.b, 1)] +├─HashLeftJoin_11 12487.50 root left outer join, inner:TableReader_16, equal:[eq(t1.a, t2.a)] │ ├─TableReader_13 10000.00 root data:TableScan_12 │ │ └─TableScan_12 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo -│ └─TableReader_15 10000.00 root data:TableScan_14 -│ └─TableScan_14 10000.00 cop table:t2, range:[-inf,+inf], keep order:false, stats:pseudo -└─HashLeftJoin_16 12500.00 root left outer join, inner:TableReader_20, equal:[eq(t3.a, t4.a)] - ├─TableReader_18 10000.00 root data:TableScan_17 - │ └─TableScan_17 10000.00 cop table:t3, range:[-inf,+inf], keep order:false, stats:pseudo - └─TableReader_20 10000.00 root data:TableScan_19 - └─TableScan_19 10000.00 cop table:t4, range:[-inf,+inf], keep order:false, stats:pseudo +│ └─TableReader_16 9990.00 root data:Selection_15 +│ └─Selection_15 9990.00 cop not(isnull(t2.a)) +│ └─TableScan_14 10000.00 cop table:t2, range:[-inf,+inf], keep order:false, stats:pseudo +└─HashLeftJoin_17 12487.50 root left outer join, inner:TableReader_22, equal:[eq(t3.a, t4.a)] + ├─TableReader_19 10000.00 root data:TableScan_18 + │ └─TableScan_18 10000.00 cop table:t3, range:[-inf,+inf], keep order:false, stats:pseudo + └─TableReader_22 9990.00 root data:Selection_21 + └─Selection_21 9990.00 cop not(isnull(t4.a)) + └─TableScan_20 10000.00 cop table:t4, range:[-inf,+inf], keep order:false, stats:pseudo drop table if exists t; create table t(a bigint primary key, b bigint); desc select * from t where a = 1; diff --git a/cmd/explaintest/r/topn_push_down.result b/cmd/explaintest/r/topn_push_down.result index 4651bb89ac6b2..9941e82c75128 100644 --- a/cmd/explaintest/r/topn_push_down.result +++ b/cmd/explaintest/r/topn_push_down.result @@ -169,17 +169,18 @@ LIMIT 0, 5; id count task operator info Projection_13 0.00 root te.expect_time └─Limit_19 0.00 root offset:0, count:5 - └─IndexJoin_137 0.00 root left outer join, inner:IndexReader_136, outer key:tr.id, inner key:p.relate_id - ├─TopN_140 0.00 root te.expect_time:asc, offset:0, count:5 - │ └─IndexJoin_35 0.00 root inner join, inner:IndexLookUp_34, outer key:tr.id, inner key:te.trade_id - │ ├─IndexLookUp_105 0.00 root - │ │ ├─Selection_103 0.00 cop eq(tr.business_type, 18), in(tr.trade_type, 1) - │ │ │ └─IndexScan_101 10.00 cop table:tr, index:shop_identy, trade_status, business_type, trade_pay_status, trade_type, delivery_type, source, biz_date, range:[810094178,810094178], keep order:false, stats:pseudo - │ │ └─Selection_104 0.00 cop eq(tr.brand_identy, 32314), eq(tr.domain_type, 2) - │ │ └─TableScan_102 0.00 cop table:tr, keep order:false - │ └─IndexLookUp_34 250.00 root - │ ├─IndexScan_31 10.00 cop table:te, index:trade_id, range: decided by [tr.id], keep order:false, stats:pseudo - │ └─Selection_33 250.00 cop ge(te.expect_time, 2018-04-23 00:00:00.000000), le(te.expect_time, 2018-04-23 23:59:59.000000) - │ └─TableScan_32 10.00 cop table:te, keep order:false, stats:pseudo - └─IndexReader_136 10.00 root index:IndexScan_135 - └─IndexScan_135 10.00 cop table:p, index:relate_id, range: decided by [tr.id], keep order:false, stats:pseudo + └─IndexJoin_142 0.00 root left outer join, inner:IndexReader_141, outer key:tr.id, inner key:p.relate_id + ├─TopN_145 0.00 root te.expect_time:asc, offset:0, count:5 + │ └─IndexJoin_36 0.00 root inner join, inner:IndexLookUp_35, outer key:tr.id, inner key:te.trade_id + │ ├─IndexLookUp_106 0.00 root + │ │ ├─Selection_104 0.00 cop eq(tr.business_type, 18), in(tr.trade_type, 1) + │ │ │ └─IndexScan_102 10.00 cop table:tr, index:shop_identy, trade_status, business_type, trade_pay_status, trade_type, delivery_type, source, biz_date, range:[810094178,810094178], keep order:false, stats:pseudo + │ │ └─Selection_105 0.00 cop eq(tr.brand_identy, 32314), eq(tr.domain_type, 2) + │ │ └─TableScan_103 0.00 cop table:tr, keep order:false + │ └─IndexLookUp_35 250.00 root + │ ├─IndexScan_32 10.00 cop table:te, index:trade_id, range: decided by [tr.id], keep order:false, stats:pseudo + │ └─Selection_34 250.00 cop ge(te.expect_time, 2018-04-23 00:00:00.000000), le(te.expect_time, 2018-04-23 23:59:59.000000) + │ └─TableScan_33 10.00 cop table:te, keep order:false, stats:pseudo + └─IndexReader_141 0.00 root index:Selection_140 + └─Selection_140 0.00 cop not(isnull(p.relate_id)) + └─IndexScan_139 10.00 cop table:p, index:relate_id, range: decided by [tr.id], keep order:false, stats:pseudo diff --git a/executor/index_lookup_join_test.go b/executor/index_lookup_join_test.go index adeb966a1f764..9415e8ff9ffc9 100644 --- a/executor/index_lookup_join_test.go +++ b/executor/index_lookup_join_test.go @@ -48,10 +48,11 @@ func (s *testSuite1) TestIndexJoinUnionScan(c *C) { tk.MustExec("insert into t2 values(2,2,2), (3,3,3)") // TableScan below UnionScan tk.MustQuery("explain select /*+ TIDB_INLJ(t1, t2)*/ * from t1 join t2 on t1.a = t2.id").Check(testkit.Rows( - "IndexJoin_11 12500.00 root inner join, inner:UnionScan_10, outer key:test.t1.a, inner key:test.t2.id", - "├─UnionScan_12 10000.00 root ", - "│ └─TableReader_14 10000.00 root data:TableScan_13", - "│ └─TableScan_13 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo", + "IndexJoin_11 12487.50 root inner join, inner:UnionScan_10, outer key:test.t1.a, inner key:test.t2.id", + "├─UnionScan_12 9990.00 root not(isnull(test.t1.a))", + "│ └─TableReader_15 9990.00 root data:Selection_14", + "│ └─Selection_14 9990.00 cop not(isnull(test.t1.a))", + "│ └─TableScan_13 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo", "└─UnionScan_10 10.00 root ", " └─TableReader_9 10.00 root data:TableScan_8", " └─TableScan_8 10.00 cop table:t2, range: decided by [test.t1.a], keep order:false, stats:pseudo", @@ -61,14 +62,16 @@ func (s *testSuite1) TestIndexJoinUnionScan(c *C) { )) // IndexLookUp below UnionScan tk.MustQuery("explain select /*+ TIDB_INLJ(t1, t2)*/ * from t1 join t2 on t1.a = t2.a").Check(testkit.Rows( - "IndexJoin_12 12500.00 root inner join, inner:UnionScan_11, outer key:test.t1.a, inner key:test.t2.a", - "├─UnionScan_13 10000.00 root ", - "│ └─TableReader_15 10000.00 root data:TableScan_14", - "│ └─TableScan_14 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo", - "└─UnionScan_11 10.00 root ", - " └─IndexLookUp_10 10.00 root ", - " ├─IndexScan_8 10.00 cop table:t2, index:a, range: decided by [test.t1.a], keep order:false, stats:pseudo", - " └─TableScan_9 10.00 cop table:t2, keep order:false, stats:pseudo", + "IndexJoin_13 12487.50 root inner join, inner:UnionScan_12, outer key:test.t1.a, inner key:test.t2.a", + "├─UnionScan_14 9990.00 root not(isnull(test.t1.a))", + "│ └─TableReader_17 9990.00 root data:Selection_16", + "│ └─Selection_16 9990.00 cop not(isnull(test.t1.a))", + "│ └─TableScan_15 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo", + "└─UnionScan_12 0.00 root not(isnull(test.t2.a))", + " └─IndexLookUp_11 0.00 root ", + " ├─Selection_10 0.00 cop not(isnull(test.t2.a))", + " │ └─IndexScan_8 10.00 cop table:t2, index:a, range: decided by [test.t1.a], keep order:false, stats:pseudo", + " └─TableScan_9 0.00 cop table:t2, keep order:false", )) tk.MustQuery("select /*+ TIDB_INLJ(t1, t2)*/ * from t1 join t2 on t1.a = t2.a").Check(testkit.Rows( "2 2 2 2 2", @@ -76,14 +79,16 @@ func (s *testSuite1) TestIndexJoinUnionScan(c *C) { )) // IndexScan below UnionScan tk.MustQuery("explain select /*+ TIDB_INLJ(t1, t2)*/ t1.a, t2.a from t1 join t2 on t1.a = t2.a").Check(testkit.Rows( - "Projection_7 12500.00 root test.t1.a, test.t2.a", - "└─IndexJoin_11 12500.00 root inner join, inner:UnionScan_10, outer key:test.t1.a, inner key:test.t2.a", - " ├─UnionScan_12 10000.00 root ", - " │ └─TableReader_14 10000.00 root data:TableScan_13", - " │ └─TableScan_13 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo", - " └─UnionScan_10 10.00 root ", - " └─IndexReader_9 10.00 root index:IndexScan_8", - " └─IndexScan_8 10.00 cop table:t2, index:a, range: decided by [test.t1.a], keep order:false, stats:pseudo", + "Projection_7 12487.50 root test.t1.a, test.t2.a", + "└─IndexJoin_12 12487.50 root inner join, inner:UnionScan_11, outer key:test.t1.a, inner key:test.t2.a", + " ├─UnionScan_13 9990.00 root not(isnull(test.t1.a))", + " │ └─TableReader_16 9990.00 root data:Selection_15", + " │ └─Selection_15 9990.00 cop not(isnull(test.t1.a))", + " │ └─TableScan_14 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo", + " └─UnionScan_11 0.00 root not(isnull(test.t2.a))", + " └─IndexReader_10 0.00 root index:Selection_9", + " └─Selection_9 0.00 cop not(isnull(test.t2.a))", + " └─IndexScan_8 10.00 cop table:t2, index:a, range: decided by [test.t1.a], keep order:false, stats:pseudo", )) tk.MustQuery("select /*+ TIDB_INLJ(t1, t2)*/ t1.a, t2.a from t1 join t2 on t1.a = t2.a").Check(testkit.Rows( "2 2", @@ -104,13 +109,15 @@ func (s *testSuite1) TestBatchIndexJoinUnionScan(c *C) { tk.MustExec("insert into t2 values(1,1)") tk.MustQuery("explain select /*+ TIDB_INLJ(t1, t2)*/ count(*) from t1 join t2 on t1.a = t2.a").Check(testkit.Rows( "StreamAgg_13 1.00 root funcs:count(1)", - "└─IndexJoin_24 12500.00 root inner join, inner:UnionScan_23, outer key:test.t1.a, inner key:test.t2.a", - " ├─UnionScan_25 10000.00 root ", - " │ └─TableReader_27 10000.00 root data:TableScan_26", - " │ └─TableScan_26 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo", - " └─UnionScan_23 10.00 root ", - " └─IndexReader_22 10.00 root index:IndexScan_21", - " └─IndexScan_21 10.00 cop table:t2, index:a, range: decided by [test.t1.a], keep order:false, stats:pseudo", + "└─IndexJoin_27 12487.50 root inner join, inner:UnionScan_26, outer key:test.t1.a, inner key:test.t2.a", + " ├─UnionScan_28 9990.00 root not(isnull(test.t1.a))", + " │ └─TableReader_31 9990.00 root data:Selection_30", + " │ └─Selection_30 9990.00 cop not(isnull(test.t1.a))", + " │ └─TableScan_29 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo", + " └─UnionScan_26 0.00 root not(isnull(test.t2.a))", + " └─IndexReader_25 0.00 root index:Selection_24", + " └─Selection_24 0.00 cop not(isnull(test.t2.a))", + " └─IndexScan_23 10.00 cop table:t2, index:a, range: decided by [test.t1.a], keep order:false, stats:pseudo", )) tk.MustQuery("select /*+ TIDB_INLJ(t1, t2)*/ count(*) from t1 join t2 on t1.a = t2.id").Check(testkit.Rows( "4", diff --git a/executor/join_test.go b/executor/join_test.go index e51fd86df9ab4..e55a1c62a5d71 100644 --- a/executor/join_test.go +++ b/executor/join_test.go @@ -944,47 +944,17 @@ func (s *testSuite2) TestHashJoin(c *C) { tk.MustQuery("select count(*) from t2").Check(testkit.Rows("0")) tk.MustExec("set @@tidb_max_chunk_size=1;") result := tk.MustQuery("explain analyze select /*+ TIDB_HJ(t1, t2) */ * from t1 where exists (select a from t2 where t1.a = t2.a);") - // id count task operator info execution info - // HashLeftJoin_9 8000.00 root semi join, inner:TableReader_13, equal:[eq(test.t1.a, test.t2.a)] time:1.036712ms, loops:1, rows:0 - // ├─TableReader_11 10000.00 root data:TableScan_10 time:441.096µs, loops:1, rows:1 - // │ └─TableScan_10 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo - // └─TableReader_13 10000.00 root data:TableScan_12 time:212.376µs, loops:1, rows:0 - // └─TableScan_12 10000.00 cop table:t2, range:[-inf,+inf], keep order:false, stats:pseudo + // HashLeftJoin_9 7992.00 root semi join, inner:TableReader_15, equal:[eq(test.t1.a, test.t2.a)] time:219.863µs, loops:1, rows:0 + // ├─TableReader_12 9990.00 root data:Selection_11 time:9.129µs, loops:1, rows:1 + // │ └─Selection_11 9990.00 cop not(isnull(test.t1.a)) + // │ └─TableScan_10 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo time:0s, loops:0, rows:5 + // └─TableReader_15 9990.00 root data:Selection_14 time:12.983µs, loops:1, rows:0 + // └─Selection_14 9990.00 cop not(isnull(test.t2.a)) + // └─TableScan_13 10000.00 cop table:t2, range:[-inf,+inf], keep order:false, stats:pseudo time:0s, loops:0, rows:0 row := result.Rows() - c.Assert(len(row), Equals, 5) + c.Assert(len(row), Equals, 7) outerExecInfo := row[1][4].(string) c.Assert(outerExecInfo[len(outerExecInfo)-1:], Equals, "1") - innerExecInfo := row[3][4].(string) + innerExecInfo := row[4][4].(string) c.Assert(innerExecInfo[len(innerExecInfo)-1:], Equals, "0") - - tk.MustExec("insert into t2 select * from t1;") - tk.MustExec("delete from t1;") - tk.MustQuery("select count(*) from t1").Check(testkit.Rows("0")) - tk.MustQuery("select count(*) from t2").Check(testkit.Rows("5")) - result = tk.MustQuery("explain analyze select /*+ TIDB_HJ(t1, t2) */ * from t1 where not exists (select a from t2 where t1.a = t2.a);") - // id count task operator info execution info - // HashLeftJoin_9 8000.00 root anti semi join, inner:TableReader_13, equal:[eq(test.t1.a, test.t2.a)] time:534.643µs, loops:1, rows:0 - // ├─TableReader_11 10000.00 root data:TableScan_10 time:35.042µs, loops:1, rows:0 - // │ └─TableScan_10 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo - // └─TableReader_13 10000.00 root data:TableScan_12 time:0s, loops:0, rows:0 - // └─TableScan_12 10000.00 cop table:t2, range:[-inf,+inf], keep order:false, stats:pseudo - row = result.Rows() - c.Assert(len(row), Equals, 5) - outerExecInfo = row[1][4].(string) - c.Assert(outerExecInfo[len(outerExecInfo)-1:], Equals, "0") - innerExecInfo = row[3][4].(string) - c.Assert(innerExecInfo[len(innerExecInfo)-1:], LessEqual, "5") - - result = tk.MustQuery("explain select /*+ TIDB_HJ(t1, t2) */ * from t1 left outer join t2 on t1.a = t2.a;") - // id count task operator info execution info - // HashLeftJoin_6 12500.00 root left outer join, inner:TableReader_10, equal:[eq(test.t1.a, test.t2.a)] time:502.553µs, loops:1, rows:0 - // ├─TableReader_8 10000.00 root data:TableScan_7 time:27.302µs, loops:1, rows:0 - // │ └─TableScan_7 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo - // └─TableReader_10 10000.00 root data:TableScan_9 time:0s, loops:0, rows:0 - // └─TableScan_9 10000.00 cop table:t2, range:[-inf,+inf], keep order:false, stats:pseudo - c.Assert(len(row), Equals, 5) - outerExecInfo = row[1][4].(string) - c.Assert(outerExecInfo[len(outerExecInfo)-1:], Equals, "0") - innerExecInfo = row[3][4].(string) - c.Assert(innerExecInfo[len(innerExecInfo)-1:], LessEqual, "5") } diff --git a/expression/builtin.go b/expression/builtin.go index 0cc7756e38f25..9a90e81ebc0e1 100644 --- a/expression/builtin.go +++ b/expression/builtin.go @@ -309,7 +309,9 @@ type functionClass interface { getFunction(ctx sessionctx.Context, args []Expression) (builtinFunc, error) } -// funcs holds all registered builtin functions. +// funcs holds all registered builtin functions. When new function is added, +// check expression/function_traits.go to see if it should be appended to +// any set there. var funcs = map[string]functionClass{ // common functions ast.Coalesce: &coalesceFunctionClass{baseFunctionClass{ast.Coalesce, 1, -1}}, diff --git a/expression/constant_propagation.go b/expression/constant_propagation.go index ec69cf62ef05f..130417a6dee88 100644 --- a/expression/constant_propagation.go +++ b/expression/constant_propagation.go @@ -300,6 +300,9 @@ type propOuterJoinConstSolver struct { filterConds []Expression outerSchema *Schema innerSchema *Schema + // nullSensitive indicates if this outer join is null sensitive, if true, we cannot generate + // additional `col is not null` condition from column equal conditions. + nullSensitive bool } func (s *propOuterJoinConstSolver) setConds2ConstFalse(filterConds bool) { @@ -461,7 +464,7 @@ func (s *propOuterJoinConstSolver) deriveConds(outerCol, innerCol *Column, schem // 'expression(..., outerCol, ...)' does not reference columns outside children schemas of join node. // Derived new expressions must be appended into join condition, not filter condition. func (s *propOuterJoinConstSolver) propagateColumnEQ() { - visited := make([]bool, len(s.joinConds)+len(s.filterConds)) + visited := make([]bool, 2*len(s.joinConds)+len(s.filterConds)) s.unionSet = disjointset.NewIntSet(len(s.columns)) var outerCol, innerCol *Column // Only consider column equal condition in joinConds. @@ -473,6 +476,22 @@ func (s *propOuterJoinConstSolver) propagateColumnEQ() { innerID := s.getColID(innerCol) s.unionSet.Union(outerID, innerID) visited[i] = true + // Generate `innerCol is not null` from `outerCol = innerCol`. Note that `outerCol is not null` + // does not hold since we are in outer join. + // For AntiLeftOuterSemiJoin, this does not work, for example: + // `select *, t1.a not in (select t2.b from t t2) from t t1` does not imply `t2.b is not null`. + // For LeftOuterSemiJoin, this does not work either, for example: + // `select *, t1.a in (select t2.b from t t2) from t t1` + // rows with t2.b is null would impact whether LeftOuterSemiJoin should output 0 or null if there + // is no row satisfying t2.b = t1.a + if s.nullSensitive { + continue + } + childCol := s.innerSchema.RetrieveColumn(innerCol) + if !mysql.HasNotNullFlag(childCol.RetType.Flag) { + notNullExpr := BuildNotNullExpr(s.ctx, childCol) + s.joinConds = append(s.joinConds, notNullExpr) + } } } lenJoinConds := len(s.joinConds) @@ -538,10 +557,12 @@ func propagateConstantDNF(ctx sessionctx.Context, conds []Expression) []Expressi // Second step is to extract `outerCol = innerCol` from join conditions, and derive new join // conditions based on this column equal condition and `outerCol` related // expressions in join conditions and filter conditions; -func PropConstOverOuterJoin(ctx sessionctx.Context, joinConds, filterConds []Expression, outerSchema, innerSchema *Schema) ([]Expression, []Expression) { +func PropConstOverOuterJoin(ctx sessionctx.Context, joinConds, filterConds []Expression, + outerSchema, innerSchema *Schema, nullSensitive bool) ([]Expression, []Expression) { solver := &propOuterJoinConstSolver{ - outerSchema: outerSchema, - innerSchema: innerSchema, + outerSchema: outerSchema, + innerSchema: innerSchema, + nullSensitive: nullSensitive, } solver.colMapper = make(map[int64]int) solver.ctx = ctx diff --git a/expression/constant_propagation_test.go b/expression/constant_propagation_test.go index a9d2b49ff9e19..f1727d9356587 100644 --- a/expression/constant_propagation_test.go +++ b/expression/constant_propagation_test.go @@ -87,7 +87,7 @@ func (s *testSuite) TestOuterJoinPropConst(c *C) { "├─TableReader_8 10000.00 root data:TableScan_7", "│ └─TableScan_7 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo", "└─TableReader_11 3333.33 root data:Selection_10", - " └─Selection_10 3333.33 cop gt(test.t2.a, 1)", + " └─Selection_10 3333.33 cop gt(test.t2.a, 1), not(isnull(test.t2.a))", " └─TableScan_9 10000.00 cop table:t2, range:[-inf,+inf], keep order:false, stats:pseudo", )) tk.MustQuery("explain select * from t1 left join t2 on t1.a = t2.a where t1.a > 1;").Check(testkit.Rows( @@ -96,7 +96,7 @@ func (s *testSuite) TestOuterJoinPropConst(c *C) { "│ └─Selection_9 3333.33 cop gt(test.t1.a, 1)", "│ └─TableScan_8 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo", "└─TableReader_13 3333.33 root data:Selection_12", - " └─Selection_12 3333.33 cop gt(test.t2.a, 1)", + " └─Selection_12 3333.33 cop gt(test.t2.a, 1), not(isnull(test.t2.a))", " └─TableScan_11 10000.00 cop table:t2, range:[-inf,+inf], keep order:false, stats:pseudo", )) tk.MustQuery("explain select * from t1 right join t2 on t1.a > t2.a where t2.a = 1;").Check(testkit.Rows( @@ -111,7 +111,7 @@ func (s *testSuite) TestOuterJoinPropConst(c *C) { tk.MustQuery("explain select * from t1 right join t2 on t1.a = t2.a where t2.a > 1;").Check(testkit.Rows( "HashRightJoin_7 4166.67 root right outer join, inner:TableReader_10, equal:[eq(test.t1.a, test.t2.a)]", "├─TableReader_10 3333.33 root data:Selection_9", - "│ └─Selection_9 3333.33 cop gt(test.t1.a, 1)", + "│ └─Selection_9 3333.33 cop gt(test.t1.a, 1), not(isnull(test.t1.a))", "│ └─TableScan_8 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo", "└─TableReader_13 3333.33 root data:Selection_12", " └─Selection_12 3333.33 cop gt(test.t2.a, 1)", @@ -120,7 +120,7 @@ func (s *testSuite) TestOuterJoinPropConst(c *C) { tk.MustQuery("explain select * from t1 right join t2 on t1.a = t2.a and t2.a > 1;").Check(testkit.Rows( "HashRightJoin_6 10000.00 root right outer join, inner:TableReader_9, equal:[eq(test.t1.a, test.t2.a)], right cond:gt(test.t2.a, 1)", "├─TableReader_9 3333.33 root data:Selection_8", - "│ └─Selection_8 3333.33 cop gt(test.t1.a, 1)", + "│ └─Selection_8 3333.33 cop gt(test.t1.a, 1), not(isnull(test.t1.a))", "│ └─TableScan_7 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo", "└─TableReader_11 10000.00 root data:TableScan_10", " └─TableScan_10 10000.00 cop table:t2, range:[-inf,+inf], keep order:false, stats:pseudo", @@ -139,7 +139,7 @@ func (s *testSuite) TestOuterJoinPropConst(c *C) { "├─TableReader_8 10000.00 root data:TableScan_7", "│ └─TableScan_7 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo", "└─TableReader_11 3333.33 root data:Selection_10", - " └─Selection_10 3333.33 cop gt(test.t2.a, 1)", + " └─Selection_10 3333.33 cop gt(test.t2.a, 1), not(isnull(test.t2.a))", " └─TableScan_9 10000.00 cop table:t2, range:[-inf,+inf], keep order:false, stats:pseudo", )) tk.MustQuery("explain select * from t1 left join t2 on t1.a > t2.a and t2.a = 1;").Check(testkit.Rows( @@ -147,13 +147,13 @@ func (s *testSuite) TestOuterJoinPropConst(c *C) { "├─TableReader_8 10000.00 root data:TableScan_7", "│ └─TableScan_7 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo", "└─TableReader_11 10.00 root data:Selection_10", - " └─Selection_10 10.00 cop eq(test.t2.a, 1)", + " └─Selection_10 10.00 cop eq(test.t2.a, 1), not(isnull(test.t2.a))", " └─TableScan_9 10000.00 cop table:t2, range:[-inf,+inf], keep order:false, stats:pseudo", )) tk.MustQuery("explain select * from t1 right join t2 on t1.a > t2.a and t1.a = 1;").Check(testkit.Rows( "HashRightJoin_6 100000.00 root right outer join, inner:TableReader_9, other cond:gt(test.t1.a, test.t2.a)", "├─TableReader_9 10.00 root data:Selection_8", - "│ └─Selection_8 10.00 cop eq(test.t1.a, 1)", + "│ └─Selection_8 10.00 cop eq(test.t1.a, 1), not(isnull(test.t1.a))", "│ └─TableScan_7 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo", "└─TableReader_11 10000.00 root data:TableScan_10", " └─TableScan_10 10000.00 cop table:t2, range:[-inf,+inf], keep order:false, stats:pseudo", @@ -161,7 +161,7 @@ func (s *testSuite) TestOuterJoinPropConst(c *C) { tk.MustQuery("explain select * from t1 right join t2 on t1.a = t2.a and t1.a > 1;").Check(testkit.Rows( "HashRightJoin_6 10000.00 root right outer join, inner:TableReader_9, equal:[eq(test.t1.a, test.t2.a)]", "├─TableReader_9 3333.33 root data:Selection_8", - "│ └─Selection_8 3333.33 cop gt(test.t1.a, 1)", + "│ └─Selection_8 3333.33 cop gt(test.t1.a, 1), not(isnull(test.t1.a))", "│ └─TableScan_7 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo", "└─TableReader_11 10000.00 root data:TableScan_10", " └─TableScan_10 10000.00 cop table:t2, range:[-inf,+inf], keep order:false, stats:pseudo", @@ -197,7 +197,7 @@ func (s *testSuite) TestOuterJoinPropConst(c *C) { tk.MustQuery("explain select * from t1 left join t2 on true where t1.a = 1 and t1.a = 1;").Check(testkit.Rows( "HashLeftJoin_7 80000.00 root left outer join, inner:TableReader_12", "├─TableReader_10 10.00 root data:Selection_9", - "│ └─Selection_9 10.00 cop eq(test.t1.a, 1), eq(test.t1.a, 1)", + "│ └─Selection_9 10.00 cop eq(test.t1.a, 1)", "│ └─TableScan_8 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo", "└─TableReader_12 10000.00 root data:TableScan_11", " └─TableScan_11 10000.00 cop table:t2, range:[-inf,+inf], keep order:false, stats:pseudo", diff --git a/expression/function_traits.go b/expression/function_traits.go index 24ffa391db6c0..c1c39b9309834 100644 --- a/expression/function_traits.go +++ b/expression/function_traits.go @@ -62,3 +62,44 @@ var DeferredFunctions = map[string]struct{}{ var inequalFunctions = map[string]struct{}{ ast.IsNull: {}, } + +// mutableEffectsFunctions stores functions which are mutable or have side effects, specifically, +// we cannot remove them from filter even if they have duplicates. +var mutableEffectsFunctions = map[string]struct{}{ + // Time related functions in MySQL have various behaviors when executed multiple times in a single SQL, + // for example: + // mysql> select current_timestamp(), sleep(5), current_timestamp(); + // +---------------------+----------+---------------------+ + // | current_timestamp() | sleep(5) | current_timestamp() | + // +---------------------+----------+---------------------+ + // | 2018-12-18 17:55:39 | 0 | 2018-12-18 17:55:39 | + // +---------------------+----------+---------------------+ + // while: + // mysql> select sysdate(), sleep(5), sysdate(); + // +---------------------+----------+---------------------+ + // | sysdate() | sleep(5) | sysdate() | + // +---------------------+----------+---------------------+ + // | 2018-12-18 17:57:38 | 0 | 2018-12-18 17:57:43 | + // +---------------------+----------+---------------------+ + // for safety consideration, treat them all as mutable. + ast.Now: {}, + ast.CurrentTimestamp: {}, + ast.UTCTime: {}, + ast.Curtime: {}, + ast.CurrentTime: {}, + ast.UTCTimestamp: {}, + ast.UnixTimestamp: {}, + ast.Sysdate: {}, + ast.Curdate: {}, + ast.CurrentDate: {}, + ast.UTCDate: {}, + + ast.Rand: {}, + ast.RandomBytes: {}, + ast.UUID: {}, + ast.UUIDShort: {}, + ast.Sleep: {}, + ast.SetVar: {}, + ast.GetVar: {}, + ast.AnyValue: {}, +} diff --git a/expression/util.go b/expression/util.go index 66501b9e1cc0b..008d80598d2f0 100644 --- a/expression/util.go +++ b/expression/util.go @@ -587,3 +587,47 @@ func GetIntFromConstant(ctx sessionctx.Context, value Expression) (int, bool, er } return intNum, false, nil } + +// BuildNotNullExpr wraps up `not(isnull())` for given expression. +func BuildNotNullExpr(ctx sessionctx.Context, expr Expression) Expression { + isNull := NewFunctionInternal(ctx, ast.IsNull, types.NewFieldType(mysql.TypeTiny), expr) + notNull := NewFunctionInternal(ctx, ast.UnaryNot, types.NewFieldType(mysql.TypeTiny), isNull) + return notNull +} + +// isMutableEffectsExpr checks if expr contains function which is mutable or has side effects. +func isMutableEffectsExpr(expr Expression) bool { + switch x := expr.(type) { + case *ScalarFunction: + if _, ok := mutableEffectsFunctions[x.FuncName.L]; ok { + return true + } + for _, arg := range x.GetArgs() { + if isMutableEffectsExpr(arg) { + return true + } + } + case *Column: + case *Constant: + if x.DeferredExpr != nil { + return isMutableEffectsExpr(x.DeferredExpr) + } + } + return false +} + +// RemoveDupExprs removes identical exprs. Not that if expr contains functions which +// are mutable or have side effects, we cannot remove it even if it has duplicates. +func RemoveDupExprs(ctx sessionctx.Context, exprs []Expression) []Expression { + res := make([]Expression, 0, len(exprs)) + exists := make(map[string]struct{}, len(exprs)) + sc := ctx.GetSessionVars().StmtCtx + for _, expr := range exprs { + key := string(expr.HashCode(sc)) + if _, ok := exists[key]; !ok || isMutableEffectsExpr(expr) { + res = append(res, expr) + exists[key] = struct{}{} + } + } + return res +} diff --git a/planner/core/cbo_test.go b/planner/core/cbo_test.go index 5cf32c9b54a12..26c51b02264b0 100644 --- a/planner/core/cbo_test.go +++ b/planner/core/cbo_test.go @@ -112,11 +112,13 @@ func (s *testAnalyzeSuite) TestCBOWithoutAnalyze(c *C) { h.DumpStatsDeltaToKV(statistics.DumpAll) c.Assert(h.Update(dom.InfoSchema()), IsNil) testKit.MustQuery("explain select * from t1, t2 where t1.a = t2.a").Check(testkit.Rows( - "HashLeftJoin_8 7.50 root inner join, inner:TableReader_13, equal:[eq(test.t1.a, test.t2.a)]", - "├─TableReader_11 6.00 root data:TableScan_10", - "│ └─TableScan_10 6.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo", - "└─TableReader_13 6.00 root data:TableScan_12", - " └─TableScan_12 6.00 cop table:t2, range:[-inf,+inf], keep order:false, stats:pseudo", + "HashLeftJoin_8 7.49 root inner join, inner:TableReader_15, equal:[eq(test.t1.a, test.t2.a)]", + "├─TableReader_12 5.99 root data:Selection_11", + "│ └─Selection_11 5.99 cop not(isnull(test.t1.a))", + "│ └─TableScan_10 6.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo", + "└─TableReader_15 5.99 root data:Selection_14", + " └─Selection_14 5.99 cop not(isnull(test.t2.a))", + " └─TableScan_13 6.00 cop table:t2, range:[-inf,+inf], keep order:false, stats:pseudo", )) } @@ -165,17 +167,19 @@ func (s *testAnalyzeSuite) TestStraightJoin(c *C) { )) testKit.MustQuery("explain select straight_join * from t1, t2, t3, t4 where t1.a=t4.a;").Check(testkit.Rows( - "HashLeftJoin_11 1250000000000.00 root inner join, inner:TableReader_24, equal:[eq(test.t1.a, test.t4.a)]", - "├─HashLeftJoin_13 1000000000000.00 root inner join, inner:TableReader_22", - "│ ├─HashLeftJoin_15 100000000.00 root inner join, inner:TableReader_20", - "│ │ ├─TableReader_18 10000.00 root data:TableScan_17", - "│ │ │ └─TableScan_17 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo", - "│ │ └─TableReader_20 10000.00 root data:TableScan_19", - "│ │ └─TableScan_19 10000.00 cop table:t2, range:[-inf,+inf], keep order:false, stats:pseudo", - "│ └─TableReader_22 10000.00 root data:TableScan_21", - "│ └─TableScan_21 10000.00 cop table:t3, range:[-inf,+inf], keep order:false, stats:pseudo", - "└─TableReader_24 10000.00 root data:TableScan_23", - " └─TableScan_23 10000.00 cop table:t4, range:[-inf,+inf], keep order:false, stats:pseudo", + "HashLeftJoin_11 1248750000000.00 root inner join, inner:TableReader_26, equal:[eq(test.t1.a, test.t4.a)]", + "├─HashLeftJoin_13 999000000000.00 root inner join, inner:TableReader_23", + "│ ├─HashRightJoin_16 99900000.00 root inner join, inner:TableReader_19", + "│ │ ├─TableReader_19 9990.00 root data:Selection_18", + "│ │ │ └─Selection_18 9990.00 cop not(isnull(test.t1.a))", + "│ │ │ └─TableScan_17 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo", + "│ │ └─TableReader_21 10000.00 root data:TableScan_20", + "│ │ └─TableScan_20 10000.00 cop table:t2, range:[-inf,+inf], keep order:false, stats:pseudo", + "│ └─TableReader_23 10000.00 root data:TableScan_22", + "│ └─TableScan_22 10000.00 cop table:t3, range:[-inf,+inf], keep order:false, stats:pseudo", + "└─TableReader_26 9990.00 root data:Selection_25", + " └─Selection_25 9990.00 cop not(isnull(test.t4.a))", + " └─TableScan_24 10000.00 cop table:t4, range:[-inf,+inf], keep order:false, stats:pseudo", )) } @@ -409,11 +413,11 @@ func (s *testAnalyzeSuite) TestEmptyTable(c *C) { }, { sql: "select * from t where c1 in (select c1 from t1)", - best: "RightHashJoin{TableReader(Table(t1)->HashAgg)->HashAgg->TableReader(Table(t))}(test.t1.c1,test.t.c1)->Projection", + best: "RightHashJoin{TableReader(Table(t1)->Sel([not(isnull(test.t1.c1))])->HashAgg)->HashAgg->TableReader(Table(t)->Sel([not(isnull(test.t.c1))]))}(test.t1.c1,test.t.c1)->Projection", }, { sql: "select * from t, t1 where t.c1 = t1.c1", - best: "LeftHashJoin{TableReader(Table(t))->TableReader(Table(t1))}(test.t.c1,test.t1.c1)", + best: "LeftHashJoin{TableReader(Table(t)->Sel([not(isnull(test.t.c1))]))->TableReader(Table(t1)->Sel([not(isnull(test.t1.c1))]))}(test.t.c1,test.t1.c1)", }, { sql: "select * from t limit 0", @@ -690,10 +694,10 @@ func (s *testAnalyzeSuite) TestCorrelatedEstimation(c *C) { " └─StreamAgg_20 1.00 root funcs:count(1)", " └─HashLeftJoin_21 1.00 root inner join, inner:TableReader_28, equal:[eq(s.a, t1.a)]", " ├─TableReader_25 1.00 root data:Selection_24", - " │ └─Selection_24 1.00 cop eq(s.a, test.t.a)", + " │ └─Selection_24 1.00 cop eq(s.a, test.t.a), not(isnull(s.a))", " │ └─TableScan_23 10.00 cop table:s, range:[-inf,+inf], keep order:false", " └─TableReader_28 1.00 root data:Selection_27", - " └─Selection_27 1.00 cop eq(t1.a, test.t.a)", + " └─Selection_27 1.00 cop eq(t1.a, test.t.a), not(isnull(t1.a))", " └─TableScan_26 10.00 cop table:t1, range:[-inf,+inf], keep order:false", )) tk.MustQuery("explain select (select concat(t1.a, \",\", t1.b) from t t1 where t1.a=t.a and t1.c=t.c) from t"). diff --git a/planner/core/expression_rewriter.go b/planner/core/expression_rewriter.go index 2a4af05983ace..538a9a266bbcd 100644 --- a/planner/core/expression_rewriter.go +++ b/planner/core/expression_rewriter.go @@ -648,17 +648,11 @@ func (er *expressionRewriter) handleInSubquery(v *ast.PatternInExpr) (ast.Node, for _, col := range agg.schema.Columns { col.IsReferenced = true } - eq, left, right, other := extractOnCondition(expression.SplitCNFItems(checkCondition), er.p, agg, false, false) // Build inner join above the aggregation. - join := LogicalJoin{ - JoinType: InnerJoin, - EqualConditions: eq, - LeftConditions: left, - RightConditions: right, - OtherConditions: other, - }.Init(er.ctx) + join := LogicalJoin{JoinType: InnerJoin}.Init(er.ctx) join.SetChildren(er.p, agg) join.SetSchema(expression.MergeSchema(er.p.Schema(), agg.schema)) + join.attachOnConds(expression.SplitCNFItems(checkCondition)) // Set join hint for this join. if er.b.TableHints() != nil { er.err = join.setPreferredJoinType(er.b.TableHints()) diff --git a/planner/core/logical_plan_builder.go b/planner/core/logical_plan_builder.go index 72aa76dddb3e4..be68b36b92daf 100644 --- a/planner/core/logical_plan_builder.go +++ b/planner/core/logical_plan_builder.go @@ -180,21 +180,45 @@ func (b *PlanBuilder) buildResultSetNode(node ast.ResultSetNode) (p LogicalPlan, // extractOnCondition divide conditions in CNF of join node into 4 groups. // These conditions can be where conditions, join conditions, or collection of both. // If deriveLeft/deriveRight is set, we would try to derive more conditions for left/right plan. -func extractOnCondition(conditions []expression.Expression, left LogicalPlan, right LogicalPlan, - deriveLeft bool, deriveRight bool) (eqCond []*expression.ScalarFunction, leftCond []expression.Expression, +func (p *LogicalJoin) extractOnCondition(conditions []expression.Expression, deriveLeft bool, + deriveRight bool) (eqCond []*expression.ScalarFunction, leftCond []expression.Expression, rightCond []expression.Expression, otherCond []expression.Expression) { + left, right := p.children[0], p.children[1] for _, expr := range conditions { binop, ok := expr.(*expression.ScalarFunction) - if ok && binop.FuncName.L == ast.EQ { - ln, lOK := binop.GetArgs()[0].(*expression.Column) - rn, rOK := binop.GetArgs()[1].(*expression.Column) + if ok && len(binop.GetArgs()) == 2 { + ctx := binop.GetCtx() + arg0, lOK := binop.GetArgs()[0].(*expression.Column) + arg1, rOK := binop.GetArgs()[1].(*expression.Column) if lOK && rOK { - if left.Schema().Contains(ln) && right.Schema().Contains(rn) { - eqCond = append(eqCond, binop) - continue + var leftCol, rightCol *expression.Column + if left.Schema().Contains(arg0) && right.Schema().Contains(arg1) { + leftCol, rightCol = arg0, arg1 + } + if leftCol == nil && left.Schema().Contains(arg1) && right.Schema().Contains(arg0) { + leftCol, rightCol = arg1, arg0 + } + if leftCol != nil { + // Do not derive `is not null` for anti join, since it may result in wrong results. + // For example: + // `select * from t t1 where t1.a not in (select b from t t2)` does not imply `t2.b is not null`, + // `select * from t t1 where t1.a not in (select a from t t2 where t1.b = t2.b` does not imply `t1.b is not null`, + // `select * from t t1 where not exists (select * from t t2 where t2.a = t1.a)` does not imply `t1.a is not null`, + if deriveLeft && p.JoinType != AntiSemiJoin && p.JoinType != AntiLeftOuterSemiJoin { + if isNullRejected(ctx, left.Schema(), expr) && !mysql.HasNotNullFlag(leftCol.RetType.Flag) { + notNullExpr := expression.BuildNotNullExpr(ctx, leftCol) + leftCond = append(leftCond, notNullExpr) + } + } + if deriveRight && p.JoinType != AntiSemiJoin && p.JoinType != AntiLeftOuterSemiJoin { + if isNullRejected(ctx, right.Schema(), expr) && !mysql.HasNotNullFlag(rightCol.RetType.Flag) { + notNullExpr := expression.BuildNotNullExpr(ctx, rightCol) + rightCond = append(rightCond, notNullExpr) + } + } } - if left.Schema().Contains(rn) && right.Schema().Contains(ln) { - cond := expression.NewFunctionInternal(binop.GetCtx(), ast.EQ, types.NewFieldType(mysql.TypeTiny), rn, ln) + if leftCol != nil && binop.FuncName.L == ast.EQ { + cond := expression.NewFunctionInternal(ctx, ast.EQ, types.NewFieldType(mysql.TypeTiny), leftCol, rightCol) eqCond = append(eqCond, cond.(*expression.ScalarFunction)) continue } diff --git a/planner/core/logical_plan_test.go b/planner/core/logical_plan_test.go index 1ea9f82e35dd2..1b87cbd8f2422 100644 --- a/planner/core/logical_plan_test.go +++ b/planner/core/logical_plan_test.go @@ -285,6 +285,12 @@ func (s *testPlanSuite) TestJoinPredicatePushDown(c *C) { left: "[]", right: "[or(or(eq(t2.a, 3), eq(t2.a, 4)), eq(t2.a, 2))]", }, + // Duplicate condition would be removed. + { + sql: "select * from t t1 join t t2 on t1.a > 1 and t1.a > 1", + left: "[gt(t1.a, 1)]", + right: "[]", + }, } for _, ca := range tests { comment := Commentf("for %s", ca.sql) @@ -449,6 +455,134 @@ func (s *testPlanSuite) TestAntiSemiJoinConstFalse(c *C) { } } +func (s *testPlanSuite) TestDeriveNotNullConds(c *C) { + defer testleak.AfterTest(c)() + tests := []struct { + sql string + plan string + left string + right string + }{ + { + sql: "select * from t t1 inner join t t2 on t1.e = t2.e", + plan: "Join{DataScan(t1)->DataScan(t2)}(t1.e,t2.e)->Projection", + left: "[not(isnull(t1.e))]", + right: "[not(isnull(t2.e))]", + }, + { + sql: "select * from t t1 inner join t t2 on t1.e > t2.e", + plan: "Join{DataScan(t1)->DataScan(t2)}->Projection", + left: "[not(isnull(t1.e))]", + right: "[not(isnull(t2.e))]", + }, + { + sql: "select * from t t1 inner join t t2 on t1.e = t2.e and t1.e is not null", + plan: "Join{DataScan(t1)->DataScan(t2)}(t1.e,t2.e)->Projection", + left: "[not(isnull(t1.e))]", + right: "[not(isnull(t2.e))]", + }, + { + sql: "select * from t t1 left join t t2 on t1.e = t2.e", + plan: "Join{DataScan(t1)->DataScan(t2)}(t1.e,t2.e)->Projection", + left: "[]", + right: "[not(isnull(t2.e))]", + }, + { + sql: "select * from t t1 left join t t2 on t1.e > t2.e", + plan: "Join{DataScan(t1)->DataScan(t2)}->Projection", + left: "[]", + right: "[not(isnull(t2.e))]", + }, + { + sql: "select * from t t1 left join t t2 on t1.e = t2.e and t2.e is not null", + plan: "Join{DataScan(t1)->DataScan(t2)}(t1.e,t2.e)->Projection", + left: "[]", + right: "[not(isnull(t2.e))]", + }, + { + sql: "select * from t t1 right join t t2 on t1.e = t2.e and t1.e is not null", + plan: "Join{DataScan(t1)->DataScan(t2)}(t1.e,t2.e)->Projection", + left: "[not(isnull(t1.e))]", + right: "[]", + }, + { + sql: "select * from t t1 inner join t t2 on t1.e <=> t2.e", + plan: "Join{DataScan(t1)->DataScan(t2)}->Projection", + left: "[]", + right: "[]", + }, + { + sql: "select * from t t1 left join t t2 on t1.e <=> t2.e", + plan: "Join{DataScan(t1)->DataScan(t2)}->Projection", + left: "[]", + right: "[]", + }, + // Not deriving if column has NotNull flag already. + { + sql: "select * from t t1 inner join t t2 on t1.b = t2.b", + plan: "Join{DataScan(t1)->DataScan(t2)}(t1.b,t2.b)->Projection", + left: "[]", + right: "[]", + }, + { + sql: "select * from t t1 left join t t2 on t1.b = t2.b", + plan: "Join{DataScan(t1)->DataScan(t2)}(t1.b,t2.b)->Projection", + left: "[]", + right: "[]", + }, + { + sql: "select * from t t1 left join t t2 on t1.b > t2.b", + plan: "Join{DataScan(t1)->DataScan(t2)}->Projection", + left: "[]", + right: "[]", + }, + // Not deriving for AntiSemiJoin + { + sql: "select * from t t1 where not exists (select * from t t2 where t2.e = t1.e)", + plan: "Join{DataScan(t1)->DataScan(t2)}->Projection", + left: "[]", + right: "[]", + }, + } + for _, ca := range tests { + comment := Commentf("for %s", ca.sql) + stmt, err := s.ParseOneStmt(ca.sql, "", "") + c.Assert(err, IsNil, comment) + p, err := BuildLogicalPlan(s.ctx, stmt, s.is) + c.Assert(err, IsNil, comment) + p, err = logicalOptimize(flagPredicatePushDown|flagPrunColumns, p.(LogicalPlan)) + c.Assert(err, IsNil, comment) + c.Assert(ToString(p), Equals, ca.plan, comment) + join := p.(LogicalPlan).Children()[0].(*LogicalJoin) + left := join.Children()[0].(*DataSource) + right := join.Children()[1].(*DataSource) + leftConds := fmt.Sprintf("%s", left.pushedDownConds) + rightConds := fmt.Sprintf("%s", right.pushedDownConds) + c.Assert(leftConds, Equals, ca.left, comment) + c.Assert(rightConds, Equals, ca.right, comment) + } +} + +func (s *testPlanSuite) TestDupRandJoinCondsPushDown(c *C) { + sql := "select * from t as t1 join t t2 on t1.a > rand() and t1.a > rand()" + comment := Commentf("for %s", sql) + stmt, err := s.ParseOneStmt(sql, "", "") + c.Assert(err, IsNil, comment) + p, err := BuildLogicalPlan(s.ctx, stmt, s.is) + c.Assert(err, IsNil, comment) + p, err = logicalOptimize(flagPredicatePushDown, p.(LogicalPlan)) + c.Assert(err, IsNil, comment) + proj, ok := p.(*LogicalProjection) + c.Assert(ok, IsTrue, comment) + join, ok := proj.children[0].(*LogicalJoin) + c.Assert(ok, IsTrue, comment) + leftPlan, ok := join.children[0].(*LogicalSelection) + c.Assert(ok, IsTrue, comment) + leftCond := fmt.Sprintf("%s", leftPlan.Conditions) + // Condition with mutable function cannot be de-duplicated when push down join conds. + c.Assert(leftCond, Equals, "[gt(cast(t1.a), rand()) gt(cast(t1.a), rand())]", comment) +} + func (s *testPlanSuite) TestTablePartition(c *C) { defer testleak.AfterTest(c)() definitions := []model.PartitionDefinition{ diff --git a/planner/core/logical_plans.go b/planner/core/logical_plans.go index aa2cc510b67c9..af5d2c5fcefc7 100644 --- a/planner/core/logical_plans.go +++ b/planner/core/logical_plans.go @@ -146,7 +146,7 @@ func (p *LogicalJoin) columnSubstitute(schema *expression.Schema, exprs []expres } func (p *LogicalJoin) attachOnConds(onConds []expression.Expression) { - eq, left, right, other := extractOnCondition(onConds, p.children[0].(LogicalPlan), p.children[1].(LogicalPlan), false, false) + eq, left, right, other := p.extractOnCondition(onConds, false, false) p.EqualConditions = append(eq, p.EqualConditions...) p.LeftConditions = append(left, p.LeftConditions...) p.RightConditions = append(right, p.RightConditions...) diff --git a/planner/core/rule_predicate_push_down.go b/planner/core/rule_predicate_push_down.go index bf352289da908..35e4538b63fe7 100644 --- a/planner/core/rule_predicate_push_down.go +++ b/planner/core/rule_predicate_push_down.go @@ -111,8 +111,6 @@ func (p *LogicalTableDual) PredicatePushDown(predicates []expression.Expression) // PredicatePushDown implements LogicalPlan PredicatePushDown interface. func (p *LogicalJoin) PredicatePushDown(predicates []expression.Expression) (ret []expression.Expression, retPlan LogicalPlan) { simplifyOuterJoin(p, predicates) - leftPlan := p.children[0] - rightPlan := p.children[1] var equalCond []*expression.ScalarFunction var leftPushCond, rightPushCond, otherCond, leftCond, rightCond []expression.Expression switch p.JoinType { @@ -125,7 +123,7 @@ func (p *LogicalJoin) PredicatePushDown(predicates []expression.Expression) (ret // Handle where conditions predicates = expression.ExtractFiltersFromDNFs(p.ctx, predicates) // Only derive left where condition, because right where condition cannot be pushed down - equalCond, leftPushCond, rightPushCond, otherCond = extractOnCondition(predicates, leftPlan, rightPlan, true, false) + equalCond, leftPushCond, rightPushCond, otherCond = p.extractOnCondition(predicates, true, false) leftCond = leftPushCond // Handle join conditions, only derive right join condition, because left join condition cannot be pushed down _, derivedRightJoinCond := deriveOtherConditions(p, false, true) @@ -142,7 +140,7 @@ func (p *LogicalJoin) PredicatePushDown(predicates []expression.Expression) (ret // Handle where conditions predicates = expression.ExtractFiltersFromDNFs(p.ctx, predicates) // Only derive right where condition, because left where condition cannot be pushed down - equalCond, leftPushCond, rightPushCond, otherCond = extractOnCondition(predicates, leftPlan, rightPlan, false, true) + equalCond, leftPushCond, rightPushCond, otherCond = p.extractOnCondition(predicates, false, true) rightCond = rightPushCond // Handle join conditions, only derive left join condition, because right join condition cannot be pushed down derivedLeftJoinCond, _ := deriveOtherConditions(p, true, false) @@ -167,7 +165,7 @@ func (p *LogicalJoin) PredicatePushDown(predicates []expression.Expression) (ret return ret, dual } } - equalCond, leftPushCond, rightPushCond, otherCond = extractOnCondition(tempCond, leftPlan, rightPlan, true, true) + equalCond, leftPushCond, rightPushCond, otherCond = p.extractOnCondition(tempCond, true, true) p.LeftConditions = nil p.RightConditions = nil p.EqualConditions = equalCond @@ -175,8 +173,10 @@ func (p *LogicalJoin) PredicatePushDown(predicates []expression.Expression) (ret leftCond = leftPushCond rightCond = rightPushCond } - leftRet, lCh := leftPlan.PredicatePushDown(leftCond) - rightRet, rCh := rightPlan.PredicatePushDown(rightCond) + leftCond = expression.RemoveDupExprs(p.ctx, leftCond) + rightCond = expression.RemoveDupExprs(p.ctx, rightCond) + leftRet, lCh := p.children[0].PredicatePushDown(leftCond) + rightRet, rCh := p.children[1].PredicatePushDown(rightCond) addSelection(p, lCh, leftRet, 0) addSelection(p, rCh, rightRet, 1) p.updateEQCond() @@ -413,17 +413,50 @@ func deriveOtherConditions(p *LogicalJoin, deriveLeft bool, deriveRight bool) (l if leftRelaxedCond != nil { leftCond = append(leftCond, leftRelaxedCond) } + notNullExpr := deriveNotNullExpr(expr, leftPlan.Schema()) + if notNullExpr != nil { + leftCond = append(leftCond, notNullExpr) + } } if deriveRight { rightRelaxedCond := expression.DeriveRelaxedFiltersFromDNF(expr, rightPlan.Schema()) if rightRelaxedCond != nil { rightCond = append(rightCond, rightRelaxedCond) } + notNullExpr := deriveNotNullExpr(expr, rightPlan.Schema()) + if notNullExpr != nil { + rightCond = append(rightCond, notNullExpr) + } } } return } +// deriveNotNullExpr generates a new expression `not(isnull(col))` given `col1 op col2`, +// in which `col` is in specified schema. Caller guarantees that only one of `col1` or +// `col2` is in schema. This is only called for `OtherConditions` of outer join now, +// so it is safe even if join type is LeftOuterSemiJoin or AntiLeftOuterSemiJoin. +func deriveNotNullExpr(expr expression.Expression, schema *expression.Schema) expression.Expression { + binop, ok := expr.(*expression.ScalarFunction) + if !ok || len(binop.GetArgs()) != 2 { + return nil + } + ctx := binop.GetCtx() + arg0, lOK := binop.GetArgs()[0].(*expression.Column) + arg1, rOK := binop.GetArgs()[1].(*expression.Column) + if !lOK || !rOK { + return nil + } + childCol := schema.RetrieveColumn(arg0) + if childCol == nil { + childCol = schema.RetrieveColumn(arg1) + } + if isNullRejected(ctx, schema, expr) && !mysql.HasNotNullFlag(childCol.RetType.Flag) { + return expression.BuildNotNullExpr(ctx, childCol) + } + return nil +} + // conds2TableDual builds a LogicalTableDual if cond is constant false or null. func conds2TableDual(p LogicalPlan, conds []expression.Expression) LogicalPlan { if len(conds) != 1 { @@ -461,7 +494,8 @@ func (p *LogicalJoin) outerJoinPropConst(predicates []expression.Expression) []e p.LeftConditions = nil p.RightConditions = nil p.OtherConditions = nil - joinConds, predicates = expression.PropConstOverOuterJoin(p.ctx, joinConds, predicates, outerTable.Schema(), innerTable.Schema()) + nullSensitive := (p.JoinType == AntiLeftOuterSemiJoin || p.JoinType == LeftOuterSemiJoin) + joinConds, predicates = expression.PropConstOverOuterJoin(p.ctx, joinConds, predicates, outerTable.Schema(), innerTable.Schema(), nullSensitive) p.attachOnConds(joinConds) return predicates }