From d61a4eff092ccdbdc8dacf8e0ca4c767d803ad16 Mon Sep 17 00:00:00 2001
From: Kenan Yao <cauchy1992@gmail.com>
Date: Wed, 5 Dec 2018 17:23:18 +0800
Subject: [PATCH] plan: derive `col is not null` from `col op col` conditions

Check whether the `op` condition is null rejected, if yes, derive
`col is not null` for both sides of inner join, and for inner side
of outer join.

Also, remove duplicate conditions before pushing them down. Check
if the condition is mutable before removing duplicates.

For AntiSemiJoin, AntiLeftOuterSemiJoin and LeftOuterSemiJoin, do
not generate new `is not null` conditions.
---
 cmd/explaintest/r/explain_complex.result      |  14 +-
 .../r/explain_complex_stats.result            |  14 +-
 cmd/explaintest/r/explain_easy.result         |  93 ++++++------
 cmd/explaintest/r/explain_easy_stats.result   |  28 ++--
 cmd/explaintest/r/select.result               |  20 +--
 cmd/explaintest/r/topn_push_down.result       |  29 ++--
 executor/index_lookup_join_test.go            |  61 ++++----
 executor/join_test.go                         |  48 ++-----
 expression/builtin.go                         |   4 +-
 expression/constant_propagation.go            |  29 +++-
 expression/constant_propagation_test.go       |  18 +--
 expression/function_traits.go                 |  41 ++++++
 expression/util.go                            |  44 ++++++
 planner/core/cbo_test.go                      |  44 +++---
 planner/core/expression_rewriter.go           |  10 +-
 planner/core/logical_plan_builder.go          |  44 ++++--
 planner/core/logical_plan_test.go             | 134 ++++++++++++++++++
 planner/core/logical_plans.go                 |   2 +-
 planner/core/rule_predicate_push_down.go      |  50 +++++--
 19 files changed, 506 insertions(+), 221 deletions(-)

diff --git a/cmd/explaintest/r/explain_complex.result b/cmd/explaintest/r/explain_complex.result
index 2b43ae3a2357a..3dd044a6a89be 100644
--- a/cmd/explaintest/r/explain_complex.result
+++ b/cmd/explaintest/r/explain_complex.result
@@ -121,11 +121,11 @@ Projection_13	1.00	root	gad.id, test.dd.id, gad.aid, gad.cm, test.dd.dic, test.d
     └─IndexJoin_24	0.00	root	inner join, inner:IndexLookUp_23, outer key:gad.aid, inner key:test.dd.aid, other cond:eq(test.dd.ip, gad.ip), gt(test.dd.t, gad.t)
       ├─IndexLookUp_23	0.00	root	
       │ ├─IndexScan_20	10.00	cop	table:dd, index:aid, dic, range: decided by [gad.aid gad.ip], keep order:false, stats:pseudo
-      │ └─Selection_22	0.00	cop	eq(test.dd.bm, 0), eq(test.dd.pt, "android"), gt(test.dd.t, 1478143908)
+      │ └─Selection_22	0.00	cop	eq(test.dd.bm, 0), eq(test.dd.pt, "android"), gt(test.dd.t, 1478143908), not(isnull(test.dd.ip)), not(isnull(test.dd.t))
       │   └─TableScan_21	10.00	cop	table:dd, keep order:false, stats:pseudo
       └─IndexLookUp_33	3.33	root	
         ├─IndexScan_30	3333.33	cop	table:gad, index:t, range:(1478143908,+inf], keep order:false, stats:pseudo
-        └─Selection_32	3.33	cop	eq(gad.pt, "android")
+        └─Selection_32	3.33	cop	eq(gad.pt, "android"), not(isnull(gad.ip))
           └─TableScan_31	3333.33	cop	table:st, keep order:false, stats:pseudo
 explain select gad.id as gid,sdk.id as sid,gad.aid as aid,gad.cm as cm,sdk.dic as dic,sdk.ip as ip, sdk.t as t, gad.p1 as p1, gad.p2 as p2, gad.p3 as p3, gad.p4 as p4, gad.p5 as p5, gad.p6_md5 as p6, gad.p7_md5 as p7, gad.ext as ext from st gad join dd sdk on gad.aid = sdk.aid and gad.dic = sdk.mac and gad.t < sdk.t where gad.t > 1477971479 and gad.bm = 0 and gad.pt = 'ios' and gad.dit = 'mac' and sdk.t > 1477971479 and sdk.bm = 0 and sdk.pt = 'ios' limit 3000;
 id	count	task	operator info
@@ -134,11 +134,11 @@ Projection_10	0.00	root	gad.id, sdk.id, gad.aid, gad.cm, sdk.dic, sdk.ip, sdk.t,
   └─IndexJoin_18	0.00	root	inner join, inner:IndexLookUp_17, outer key:gad.aid, inner key:sdk.aid, other cond:eq(gad.dic, sdk.mac), lt(gad.t, sdk.t)
     ├─IndexLookUp_27	0.00	root	
     │ ├─IndexScan_24	3333.33	cop	table:gad, index:t, range:(1477971479,+inf], keep order:false, stats:pseudo
-    │ └─Selection_26	0.00	cop	eq(gad.bm, 0), eq(gad.dit, "mac"), eq(gad.pt, "ios")
+    │ └─Selection_26	0.00	cop	eq(gad.bm, 0), eq(gad.dit, "mac"), eq(gad.pt, "ios"), not(isnull(gad.dic))
     │   └─TableScan_25	3333.33	cop	table:st, keep order:false, stats:pseudo
     └─IndexLookUp_17	0.00	root	
       ├─IndexScan_14	10.00	cop	table:sdk, index:aid, dic, range: decided by [gad.aid gad.dic], keep order:false, stats:pseudo
-      └─Selection_16	0.00	cop	eq(sdk.bm, 0), eq(sdk.pt, "ios"), gt(sdk.t, 1477971479)
+      └─Selection_16	0.00	cop	eq(sdk.bm, 0), eq(sdk.pt, "ios"), gt(sdk.t, 1477971479), not(isnull(sdk.mac)), not(isnull(sdk.t))
         └─TableScan_15	10.00	cop	table:dd, keep order:false, stats:pseudo
 explain SELECT cm, p1, p2, p3, p4, p5, p6_md5, p7_md5, count(1) as click_pv, count(DISTINCT ip) as click_ip FROM st WHERE (t between 1478188800 and 1478275200) and aid='cn.sbkcq' and pt='android' GROUP BY cm, p1, p2, p3, p4, p5, p6_md5, p7_md5;
 id	count	task	operator info
@@ -153,9 +153,9 @@ id	count	task	operator info
 Projection_10	0.00	root	dt.id, dt.aid, dt.pt, dt.dic, dt.cm, rr.gid, rr.acd, rr.t, dt.p1, dt.p2, dt.p3, dt.p4, dt.p5, dt.p6_md5, dt.p7_md5
 └─Limit_13	0.00	root	offset:0, count:2000
   └─IndexJoin_19	0.00	root	inner join, inner:IndexLookUp_18, outer key:dt.aid, dt.dic, inner key:rr.aid, rr.dic
-    ├─TableReader_43	0.00	root	data:Selection_42
-    │ └─Selection_42	0.00	cop	eq(dt.bm, 0), eq(dt.pt, "ios"), gt(dt.t, 1478185592)
-    │   └─TableScan_41	10000.00	cop	table:dt, range:[0,+inf], keep order:false, stats:pseudo
+    ├─TableReader_45	0.00	root	data:Selection_44
+    │ └─Selection_44	0.00	cop	eq(dt.bm, 0), eq(dt.pt, "ios"), gt(dt.t, 1478185592), not(isnull(dt.dic))
+    │   └─TableScan_43	10000.00	cop	table:dt, range:[0,+inf], keep order:false, stats:pseudo
     └─IndexLookUp_18	3.33	root	
       ├─IndexScan_15	10.00	cop	table:rr, index:aid, dic, range: decided by [dt.aid dt.dic], keep order:false, stats:pseudo
       └─Selection_17	3.33	cop	eq(rr.pt, "ios"), gt(rr.t, 1478185592)
diff --git a/cmd/explaintest/r/explain_complex_stats.result b/cmd/explaintest/r/explain_complex_stats.result
index 1569a2d3a850c..13935717900f5 100644
--- a/cmd/explaintest/r/explain_complex_stats.result
+++ b/cmd/explaintest/r/explain_complex_stats.result
@@ -130,11 +130,11 @@ Projection_13	424.00	root	gad.id, test.dd.id, gad.aid, gad.cm, test.dd.dic, test
   └─HashAgg_19	424.00	root	group by:gad.aid, test.dd.dic, funcs:firstrow(gad.id), firstrow(gad.aid), firstrow(gad.cm), firstrow(gad.p1), firstrow(gad.p2), firstrow(gad.p3), firstrow(gad.p4), firstrow(gad.p5), firstrow(gad.p6_md5), firstrow(gad.p7_md5), firstrow(gad.ext), firstrow(gad.t), firstrow(test.dd.id), firstrow(test.dd.dic), firstrow(test.dd.ip), firstrow(test.dd.t)
     └─IndexJoin_24	424.00	root	inner join, inner:IndexLookUp_23, outer key:gad.aid, inner key:test.dd.aid, other cond:eq(gad.ip, test.dd.ip), gt(test.dd.t, gad.t)
       ├─TableReader_29	424.00	root	data:Selection_28
-      │ └─Selection_28	424.00	cop	eq(gad.bm, 0), eq(gad.pt, "android"), gt(gad.t, 1478143908)
+      │ └─Selection_28	424.00	cop	eq(gad.bm, 0), eq(gad.pt, "android"), gt(gad.t, 1478143908), not(isnull(gad.ip))
       │   └─TableScan_27	1999.00	cop	table:gad, range:[0,+inf], keep order:false
       └─IndexLookUp_23	455.80	root	
         ├─IndexScan_20	1.00	cop	table:dd, index:aid, dic, range: decided by [gad.aid gad.ip], keep order:false
-        └─Selection_22	455.80	cop	eq(test.dd.bm, 0), eq(test.dd.pt, "android"), gt(test.dd.t, 1478143908)
+        └─Selection_22	455.80	cop	eq(test.dd.bm, 0), eq(test.dd.pt, "android"), gt(test.dd.t, 1478143908), not(isnull(test.dd.ip)), not(isnull(test.dd.t))
           └─TableScan_21	1.00	cop	table:dd, keep order:false
 explain select gad.id as gid,sdk.id as sid,gad.aid as aid,gad.cm as cm,sdk.dic as dic,sdk.ip as ip, sdk.t as t, gad.p1 as p1, gad.p2 as p2, gad.p3 as p3, gad.p4 as p4, gad.p5 as p5, gad.p6_md5 as p6, gad.p7_md5 as p7, gad.ext as ext from st gad join dd sdk on gad.aid = sdk.aid and gad.dic = sdk.mac and gad.t < sdk.t where gad.t > 1477971479 and gad.bm = 0 and gad.pt = 'ios' and gad.dit = 'mac' and sdk.t > 1477971479 and sdk.bm = 0 and sdk.pt = 'ios' limit 3000;
 id	count	task	operator info
@@ -142,11 +142,11 @@ Projection_10	170.34	root	gad.id, sdk.id, gad.aid, gad.cm, sdk.dic, sdk.ip, sdk.
 └─Limit_13	170.34	root	offset:0, count:3000
   └─IndexJoin_18	170.34	root	inner join, inner:IndexLookUp_17, outer key:gad.aid, inner key:sdk.aid, other cond:eq(gad.dic, sdk.mac), lt(gad.t, sdk.t)
     ├─TableReader_23	170.34	root	data:Selection_22
-    │ └─Selection_22	170.34	cop	eq(gad.bm, 0), eq(gad.dit, "mac"), eq(gad.pt, "ios"), gt(gad.t, 1477971479)
+    │ └─Selection_22	170.34	cop	eq(gad.bm, 0), eq(gad.dit, "mac"), eq(gad.pt, "ios"), gt(gad.t, 1477971479), not(isnull(gad.dic))
     │   └─TableScan_21	1999.00	cop	table:gad, range:[0,+inf], keep order:false
     └─IndexLookUp_17	509.04	root	
       ├─IndexScan_14	1.00	cop	table:sdk, index:aid, dic, range: decided by [gad.aid gad.dic], keep order:false
-      └─Selection_16	509.04	cop	eq(sdk.bm, 0), eq(sdk.pt, "ios"), gt(sdk.t, 1477971479)
+      └─Selection_16	509.04	cop	eq(sdk.bm, 0), eq(sdk.pt, "ios"), gt(sdk.t, 1477971479), not(isnull(sdk.mac)), not(isnull(sdk.t))
         └─TableScan_15	1.00	cop	table:dd, keep order:false
 explain SELECT cm, p1, p2, p3, p4, p5, p6_md5, p7_md5, count(1) as click_pv, count(DISTINCT ip) as click_ip FROM st WHERE (t between 1478188800 and 1478275200) and aid='cn.sbkcq' and pt='android' GROUP BY cm, p1, p2, p3, p4, p5, p6_md5, p7_md5;
 id	count	task	operator info
@@ -161,9 +161,9 @@ id	count	task	operator info
 Projection_10	428.32	root	dt.id, dt.aid, dt.pt, dt.dic, dt.cm, rr.gid, rr.acd, rr.t, dt.p1, dt.p2, dt.p3, dt.p4, dt.p5, dt.p6_md5, dt.p7_md5
 └─Limit_13	428.32	root	offset:0, count:2000
   └─IndexJoin_19	428.32	root	inner join, inner:IndexLookUp_18, outer key:dt.aid, dt.dic, inner key:rr.aid, rr.dic
-    ├─TableReader_43	428.32	root	data:Selection_42
-    │ └─Selection_42	428.32	cop	eq(dt.bm, 0), eq(dt.pt, "ios"), gt(dt.t, 1478185592)
-    │   └─TableScan_41	2000.00	cop	table:dt, range:[0,+inf], keep order:false
+    ├─TableReader_45	428.32	root	data:Selection_44
+    │ └─Selection_44	428.32	cop	eq(dt.bm, 0), eq(dt.pt, "ios"), gt(dt.t, 1478185592), not(isnull(dt.dic))
+    │   └─TableScan_43	2000.00	cop	table:dt, range:[0,+inf], keep order:false
     └─IndexLookUp_18	970.00	root	
       ├─IndexScan_15	1.00	cop	table:rr, index:aid, dic, range: decided by [dt.aid dt.dic], keep order:false
       └─Selection_17	970.00	cop	eq(rr.pt, "ios"), gt(rr.t, 1478185592)
diff --git a/cmd/explaintest/r/explain_easy.result b/cmd/explaintest/r/explain_easy.result
index dca41dcc8e468..058b67576d477 100644
--- a/cmd/explaintest/r/explain_easy.result
+++ b/cmd/explaintest/r/explain_easy.result
@@ -42,12 +42,13 @@ IndexReader_9	10.00	root	index:IndexScan_8
 └─IndexScan_8	10.00	cop	table:t1, index:c2, range:[1,1], keep order:false, stats:pseudo
 explain select * from t1 left join t2 on t1.c2 = t2.c1 where t1.c1 > 1;
 id	count	task	operator info
-IndexJoin_11	4166.67	root	left outer join, inner:IndexLookUp_10, outer key:test.t1.c2, inner key:test.t2.c1
-├─TableReader_23	3333.33	root	data:TableScan_22
-│ └─TableScan_22	3333.33	cop	table:t1, range:(1,+inf], keep order:false, stats:pseudo
-└─IndexLookUp_10	10.00	root	
-  ├─IndexScan_8	10.00	cop	table:t2, index:c1, range: decided by [test.t1.c2], keep order:false, stats:pseudo
-  └─TableScan_9	10.00	cop	table:t2, keep order:false, stats:pseudo
+IndexJoin_12	4166.67	root	left outer join, inner:IndexLookUp_11, outer key:test.t1.c2, inner key:test.t2.c1
+├─TableReader_24	3333.33	root	data:TableScan_23
+│ └─TableScan_23	3333.33	cop	table:t1, range:(1,+inf], keep order:false, stats:pseudo
+└─IndexLookUp_11	0.00	root	
+  ├─Selection_10	0.00	cop	not(isnull(test.t2.c1))
+  │ └─IndexScan_8	10.00	cop	table:t2, index:c1, range: decided by [test.t1.c2], keep order:false, stats:pseudo
+  └─TableScan_9	0.00	cop	table:t2, keep order:false
 explain update t1 set t1.c2 = 2 where t1.c1 = 1;
 id	count	task	operator info
 Point_Get_1	1.00	root	table:t1, handle:1
@@ -58,12 +59,13 @@ IndexLookUp_9	10.00	root
 └─TableScan_8	10.00	cop	table:t1, keep order:false, stats:pseudo
 explain select count(b.c2) from t1 a, t2 b where a.c1 = b.c2 group by a.c1;
 id	count	task	operator info
-Projection_11	10000.00	root	cast(join_agg_0)
-└─IndexJoin_14	10000.00	root	inner join, inner:TableReader_13, outer key:b.c2, inner key:a.c1
-  ├─HashAgg_21	8000.00	root	group by:col_2, funcs:count(col_0), firstrow(col_1)
-  │ └─TableReader_22	8000.00	root	data:HashAgg_17
-  │   └─HashAgg_17	8000.00	cop	group by:b.c2, funcs:count(b.c2), firstrow(b.c2)
-  │     └─TableScan_20	10000.00	cop	table:b, range:[-inf,+inf], keep order:false, stats:pseudo
+Projection_11	9990.00	root	cast(join_agg_0)
+└─IndexJoin_14	9990.00	root	inner join, inner:TableReader_13, outer key:b.c2, inner key:a.c1
+  ├─HashAgg_22	7992.00	root	group by:col_2, funcs:count(col_0), firstrow(col_1)
+  │ └─TableReader_23	7992.00	root	data:HashAgg_17
+  │   └─HashAgg_17	7992.00	cop	group by:b.c2, funcs:count(b.c2), firstrow(b.c2)
+  │     └─Selection_21	9990.00	cop	not(isnull(b.c2))
+  │       └─TableScan_20	10000.00	cop	table:b, range:[-inf,+inf], keep order:false, stats:pseudo
   └─TableReader_13	10.00	root	data:TableScan_12
     └─TableScan_12	10.00	cop	table:a, range: decided by [b.c2], keep order:false, stats:pseudo
 explain select * from t2 order by t2.c2 limit 0, 1;
@@ -94,12 +96,13 @@ StreamAgg_12	1.00	root	funcs:sum(5_aux_0)
     └─IndexScan_22	10000.00	cop	table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo
 explain select c1 from t1 where c1 in (select c2 from t2);
 id	count	task	operator info
-Projection_9	10000.00	root	test.t1.c1
-└─IndexJoin_12	10000.00	root	inner join, inner:TableReader_11, outer key:test.t2.c2, inner key:test.t1.c1
-  ├─HashAgg_19	8000.00	root	group by:col_1, funcs:firstrow(col_0)
-  │ └─TableReader_20	8000.00	root	data:HashAgg_15
-  │   └─HashAgg_15	8000.00	cop	group by:test.t2.c2, funcs:firstrow(test.t2.c2)
-  │     └─TableScan_18	10000.00	cop	table:t2, range:[-inf,+inf], keep order:false, stats:pseudo
+Projection_9	9990.00	root	test.t1.c1
+└─IndexJoin_12	9990.00	root	inner join, inner:TableReader_11, outer key:test.t2.c2, inner key:test.t1.c1
+  ├─HashAgg_20	7992.00	root	group by:col_1, funcs:firstrow(col_0)
+  │ └─TableReader_21	7992.00	root	data:HashAgg_15
+  │   └─HashAgg_15	7992.00	cop	group by:test.t2.c2, funcs:firstrow(test.t2.c2)
+  │     └─Selection_19	9990.00	cop	not(isnull(test.t2.c2))
+  │       └─TableScan_18	10000.00	cop	table:t2, range:[-inf,+inf], keep order:false, stats:pseudo
   └─TableReader_11	10.00	root	data:TableScan_10
     └─TableScan_10	10.00	cop	table:t1, range: decided by [test.t2.c2], keep order:false, stats:pseudo
 explain select (select count(1) k from t1 s where s.c1 = t1.c1 having k != 0) from t1;
@@ -305,12 +308,13 @@ Projection_11	10000.00	root	9_aux_0
   ├─TableReader_15	10000.00	root	data:TableScan_14
   │ └─TableScan_14	10000.00	cop	table:t, range:[-inf,+inf], keep order:false, stats:pseudo
   └─StreamAgg_20	1.00	root	funcs:count(1)
-    └─IndexJoin_33	12.50	root	inner join, inner:TableReader_32, outer key:s.c, inner key:t1.a
-      ├─IndexLookUp_38	10.00	root	
-      │ ├─IndexScan_36	10.00	cop	table:s, index:b, range: decided by [eq(s.b, test.t.a)], keep order:false, stats:pseudo
-      │ └─TableScan_37	10.00	cop	table:t, keep order:false, stats:pseudo
-      └─TableReader_32	10.00	root	data:TableScan_31
-        └─TableScan_31	10.00	cop	table:t1, range: decided by [s.c], keep order:false, stats:pseudo
+    └─IndexJoin_34	12.49	root	inner join, inner:TableReader_33, outer key:s.c, inner key:t1.a
+      ├─IndexLookUp_40	9.99	root	
+      │ ├─IndexScan_37	10.00	cop	table:s, index:b, range: decided by [eq(s.b, test.t.a)], keep order:false, stats:pseudo
+      │ └─Selection_39	9.99	cop	not(isnull(s.c))
+      │   └─TableScan_38	10.00	cop	table:t, keep order:false, stats:pseudo
+      └─TableReader_33	10.00	root	data:TableScan_32
+        └─TableScan_32	10.00	cop	table:t1, range: decided by [s.c], keep order:false, stats:pseudo
 drop table if exists t;
 create table t(a int unsigned);
 explain select t.a = '123455' from t;
@@ -356,13 +360,14 @@ id	count	task	operator info
 TableDual_5	0.00	root	rows:0
 explain select * from t t1 join t t2 where t1.b = t2.b and t2.b is null;
 id	count	task	operator info
-Projection_7	12.50	root	t1.a, t1.b, t2.a, t2.b
-└─HashRightJoin_9	12.50	root	inner join, inner:TableReader_12, equal:[eq(t2.b, t1.b)]
-  ├─TableReader_12	10.00	root	data:Selection_11
-  │ └─Selection_11	10.00	cop	isnull(t2.b)
+Projection_7	0.00	root	t1.a, t1.b, t2.a, t2.b
+└─HashRightJoin_9	0.00	root	inner join, inner:TableReader_12, equal:[eq(t2.b, t1.b)]
+  ├─TableReader_12	0.00	root	data:Selection_11
+  │ └─Selection_11	0.00	cop	isnull(t2.b), not(isnull(t2.b))
   │   └─TableScan_10	10000.00	cop	table:t2, range:[-inf,+inf], keep order:false, stats:pseudo
-  └─TableReader_14	10000.00	root	data:TableScan_13
-    └─TableScan_13	10000.00	cop	table:t1, range:[-inf,+inf], keep order:false, stats:pseudo
+  └─TableReader_15	9990.00	root	data:Selection_14
+    └─Selection_14	9990.00	cop	not(isnull(t1.b))
+      └─TableScan_13	10000.00	cop	table:t1, range:[-inf,+inf], keep order:false, stats:pseudo
 explain select * from t t1 where not exists (select * from t t2 where t1.b = t2.b);
 id	count	task	operator info
 HashLeftJoin_9	8000.00	root	anti semi join, inner:TableReader_13, equal:[eq(t1.b, t2.b)]
@@ -480,14 +485,14 @@ Projection_12	10000.00	root	9_aux_0
   ├─TableReader_16	10000.00	root	data:TableScan_15
   │ └─TableScan_15	10000.00	cop	table:t, range:[-inf,+inf], keep order:false, stats:pseudo
   └─HashAgg_19	1.00	root	funcs:count(join_agg_0)
-    └─HashRightJoin_21	10.00	root	inner join, inner:HashAgg_27, equal:[eq(t1.a, s.a)]
-      ├─HashAgg_27	8.00	root	group by:col_2, funcs:count(col_0), firstrow(col_1)
-      │ └─TableReader_28	8.00	root	data:HashAgg_22
-      │   └─HashAgg_22	8.00	cop	group by:t1.a, funcs:count(1), firstrow(t1.a)
-      │     └─Selection_26	10.00	cop	eq(t1.a, test.t.a)
+    └─HashRightJoin_21	9.99	root	inner join, inner:HashAgg_27, equal:[eq(t1.a, s.a)]
+      ├─HashAgg_27	7.99	root	group by:col_2, funcs:count(col_0), firstrow(col_1)
+      │ └─TableReader_28	7.99	root	data:HashAgg_22
+      │   └─HashAgg_22	7.99	cop	group by:t1.a, funcs:count(1), firstrow(t1.a)
+      │     └─Selection_26	9.99	cop	eq(t1.a, test.t.a), not(isnull(t1.a))
       │       └─TableScan_25	10000.00	cop	table:t1, range:[-inf,+inf], keep order:false, stats:pseudo
-      └─TableReader_34	10.00	root	data:Selection_33
-        └─Selection_33	10.00	cop	eq(s.a, test.t.a)
+      └─TableReader_34	9.99	root	data:Selection_33
+        └─Selection_33	9.99	cop	eq(s.a, test.t.a), not(isnull(s.a))
           └─TableScan_32	10000.00	cop	table:s, range:[-inf,+inf], keep order:false, stats:pseudo
 explain select * from t ta left outer join t tb on ta.nb = tb.nb and ta.a > 1 where ifnull(tb.a, 1) or tb.a is null;
 id	count	task	operator info
@@ -514,14 +519,14 @@ Projection_14	10000.00	root	9_aux_0
   │ └─TableReader_19	10000.00	root	data:TableScan_18
   │   └─TableScan_18	10000.00	cop	table:t, range:[-inf,+inf], keep order:false, stats:pseudo
   └─HashAgg_23	1.00	root	funcs:count(join_agg_0)
-    └─HashRightJoin_25	10.00	root	inner join, inner:HashAgg_31, equal:[eq(t1.a, s.a)]
-      ├─HashAgg_31	8.00	root	group by:col_2, funcs:count(col_0), firstrow(col_1)
-      │ └─TableReader_32	8.00	root	data:HashAgg_26
-      │   └─HashAgg_26	8.00	cop	group by:t1.a, funcs:count(1), firstrow(t1.a)
-      │     └─Selection_30	10.00	cop	eq(t1.a, test.t.a)
+    └─HashRightJoin_25	9.99	root	inner join, inner:HashAgg_31, equal:[eq(t1.a, s.a)]
+      ├─HashAgg_31	7.99	root	group by:col_2, funcs:count(col_0), firstrow(col_1)
+      │ └─TableReader_32	7.99	root	data:HashAgg_26
+      │   └─HashAgg_26	7.99	cop	group by:t1.a, funcs:count(1), firstrow(t1.a)
+      │     └─Selection_30	9.99	cop	eq(t1.a, test.t.a), not(isnull(t1.a))
       │       └─TableScan_29	10000.00	cop	table:t1, range:[-inf,+inf], keep order:false, stats:pseudo
-      └─TableReader_38	10.00	root	data:Selection_37
-        └─Selection_37	10.00	cop	eq(s.a, test.t.a)
+      └─TableReader_38	9.99	root	data:Selection_37
+        └─Selection_37	9.99	cop	eq(s.a, test.t.a), not(isnull(s.a))
           └─TableScan_36	10000.00	cop	table:s, range:[-inf,+inf], keep order:false, stats:pseudo
 drop table if exists t;
 create table t(a int);
diff --git a/cmd/explaintest/r/explain_easy_stats.result b/cmd/explaintest/r/explain_easy_stats.result
index 12cdb96b29f03..ef9d2082e1252 100644
--- a/cmd/explaintest/r/explain_easy_stats.result
+++ b/cmd/explaintest/r/explain_easy_stats.result
@@ -47,13 +47,13 @@ explain select * from t1 left join t2 on t1.c2 = t2.c1 where t1.c1 > 1;
 id	count	task	operator info
 Projection_6	2481.25	root	test.t1.c1, test.t1.c2, test.t1.c3, test.t2.c1, test.t2.c2
 └─MergeJoin_7	2481.25	root	left outer join, left key:test.t1.c2, right key:test.t2.c1
-  ├─IndexLookUp_17	1998.00	root	
-  │ ├─Selection_16	1998.00	cop	gt(test.t1.c1, 1)
-  │ │ └─IndexScan_14	1999.00	cop	table:t1, index:c2, range:[NULL,+inf], keep order:true
-  │ └─TableScan_15	1998.00	cop	table:t1, keep order:false
-  └─IndexLookUp_21	1985.00	root	
-    ├─IndexScan_19	1985.00	cop	table:t2, index:c1, range:[NULL,+inf], keep order:true
-    └─TableScan_20	1985.00	cop	table:t2, keep order:false
+  ├─IndexLookUp_18	1998.00	root	
+  │ ├─Selection_17	1998.00	cop	gt(test.t1.c1, 1)
+  │ │ └─IndexScan_15	1999.00	cop	table:t1, index:c2, range:[NULL,+inf], keep order:true
+  │ └─TableScan_16	1998.00	cop	table:t1, keep order:false
+  └─IndexLookUp_22	1985.00	root	
+    ├─IndexScan_20	1985.00	cop	table:t2, index:c1, range:[-inf,+inf], keep order:true
+    └─TableScan_21	1985.00	cop	table:t2, keep order:false
 explain update t1 set t1.c2 = 2 where t1.c1 = 1;
 id	count	task	operator info
 Point_Get_1	1.00	root	table:t1, handle:1
@@ -66,10 +66,11 @@ explain select count(b.c2) from t1 a, t2 b where a.c1 = b.c2 group by a.c1;
 id	count	task	operator info
 Projection_11	1985.00	root	cast(join_agg_0)
 └─IndexJoin_14	1985.00	root	inner join, inner:TableReader_13, outer key:b.c2, inner key:a.c1
-  ├─HashAgg_21	1985.00	root	group by:col_2, funcs:count(col_0), firstrow(col_1)
-  │ └─TableReader_22	1985.00	root	data:HashAgg_17
+  ├─HashAgg_22	1985.00	root	group by:col_2, funcs:count(col_0), firstrow(col_1)
+  │ └─TableReader_23	1985.00	root	data:HashAgg_17
   │   └─HashAgg_17	1985.00	cop	group by:b.c2, funcs:count(b.c2), firstrow(b.c2)
-  │     └─TableScan_20	1985.00	cop	table:b, range:[-inf,+inf], keep order:false
+  │     └─Selection_21	1985.00	cop	not(isnull(b.c2))
+  │       └─TableScan_20	1985.00	cop	table:b, range:[-inf,+inf], keep order:false
   └─TableReader_13	1.00	root	data:TableScan_12
     └─TableScan_12	1.00	cop	table:a, range: decided by [b.c2], keep order:false
 explain select * from t2 order by t2.c2 limit 0, 1;
@@ -94,10 +95,11 @@ explain select c1 from t1 where c1 in (select c2 from t2);
 id	count	task	operator info
 Projection_9	1985.00	root	test.t1.c1
 └─IndexJoin_12	1985.00	root	inner join, inner:TableReader_11, outer key:test.t2.c2, inner key:test.t1.c1
-  ├─HashAgg_19	1985.00	root	group by:col_1, funcs:firstrow(col_0)
-  │ └─TableReader_20	1985.00	root	data:HashAgg_15
+  ├─HashAgg_20	1985.00	root	group by:col_1, funcs:firstrow(col_0)
+  │ └─TableReader_21	1985.00	root	data:HashAgg_15
   │   └─HashAgg_15	1985.00	cop	group by:test.t2.c2, funcs:firstrow(test.t2.c2)
-  │     └─TableScan_18	1985.00	cop	table:t2, range:[-inf,+inf], keep order:false
+  │     └─Selection_19	1985.00	cop	not(isnull(test.t2.c2))
+  │       └─TableScan_18	1985.00	cop	table:t2, range:[-inf,+inf], keep order:false
   └─TableReader_11	1.00	root	data:TableScan_10
     └─TableScan_10	1.00	cop	table:t1, range: decided by [test.t2.c2], keep order:false
 explain select * from information_schema.columns;
diff --git a/cmd/explaintest/r/select.result b/cmd/explaintest/r/select.result
index 8571aa46d1455..beaa3d8e1ba42 100644
--- a/cmd/explaintest/r/select.result
+++ b/cmd/explaintest/r/select.result
@@ -309,17 +309,19 @@ drop table if exists t;
 create table t (id int primary key, a int, b int);
 explain select * from (t t1 left join t t2 on t1.a = t2.a) left join (t t3 left join t t4 on t3.a = t4.a) on t2.b = 1;
 id	count	task	operator info
-HashLeftJoin_10	156250000.00	root	left outer join, inner:HashLeftJoin_16, left cond:[eq(t2.b, 1)]
-├─HashLeftJoin_11	12500.00	root	left outer join, inner:TableReader_15, equal:[eq(t1.a, t2.a)]
+HashLeftJoin_10	155937656.25	root	left outer join, inner:HashLeftJoin_17, left cond:[eq(t2.b, 1)]
+├─HashLeftJoin_11	12487.50	root	left outer join, inner:TableReader_16, equal:[eq(t1.a, t2.a)]
 │ ├─TableReader_13	10000.00	root	data:TableScan_12
 │ │ └─TableScan_12	10000.00	cop	table:t1, range:[-inf,+inf], keep order:false, stats:pseudo
-│ └─TableReader_15	10000.00	root	data:TableScan_14
-│   └─TableScan_14	10000.00	cop	table:t2, range:[-inf,+inf], keep order:false, stats:pseudo
-└─HashLeftJoin_16	12500.00	root	left outer join, inner:TableReader_20, equal:[eq(t3.a, t4.a)]
-  ├─TableReader_18	10000.00	root	data:TableScan_17
-  │ └─TableScan_17	10000.00	cop	table:t3, range:[-inf,+inf], keep order:false, stats:pseudo
-  └─TableReader_20	10000.00	root	data:TableScan_19
-    └─TableScan_19	10000.00	cop	table:t4, range:[-inf,+inf], keep order:false, stats:pseudo
+│ └─TableReader_16	9990.00	root	data:Selection_15
+│   └─Selection_15	9990.00	cop	not(isnull(t2.a))
+│     └─TableScan_14	10000.00	cop	table:t2, range:[-inf,+inf], keep order:false, stats:pseudo
+└─HashLeftJoin_17	12487.50	root	left outer join, inner:TableReader_22, equal:[eq(t3.a, t4.a)]
+  ├─TableReader_19	10000.00	root	data:TableScan_18
+  │ └─TableScan_18	10000.00	cop	table:t3, range:[-inf,+inf], keep order:false, stats:pseudo
+  └─TableReader_22	9990.00	root	data:Selection_21
+    └─Selection_21	9990.00	cop	not(isnull(t4.a))
+      └─TableScan_20	10000.00	cop	table:t4, range:[-inf,+inf], keep order:false, stats:pseudo
 drop table if exists t;
 create table t(a bigint primary key, b bigint);
 desc select * from t where a = 1;
diff --git a/cmd/explaintest/r/topn_push_down.result b/cmd/explaintest/r/topn_push_down.result
index 4651bb89ac6b2..9941e82c75128 100644
--- a/cmd/explaintest/r/topn_push_down.result
+++ b/cmd/explaintest/r/topn_push_down.result
@@ -169,17 +169,18 @@ LIMIT 0, 5;
 id	count	task	operator info
 Projection_13	0.00	root	te.expect_time
 └─Limit_19	0.00	root	offset:0, count:5
-  └─IndexJoin_137	0.00	root	left outer join, inner:IndexReader_136, outer key:tr.id, inner key:p.relate_id
-    ├─TopN_140	0.00	root	te.expect_time:asc, offset:0, count:5
-    │ └─IndexJoin_35	0.00	root	inner join, inner:IndexLookUp_34, outer key:tr.id, inner key:te.trade_id
-    │   ├─IndexLookUp_105	0.00	root	
-    │   │ ├─Selection_103	0.00	cop	eq(tr.business_type, 18), in(tr.trade_type, 1)
-    │   │ │ └─IndexScan_101	10.00	cop	table:tr, index:shop_identy, trade_status, business_type, trade_pay_status, trade_type, delivery_type, source, biz_date, range:[810094178,810094178], keep order:false, stats:pseudo
-    │   │ └─Selection_104	0.00	cop	eq(tr.brand_identy, 32314), eq(tr.domain_type, 2)
-    │   │   └─TableScan_102	0.00	cop	table:tr, keep order:false
-    │   └─IndexLookUp_34	250.00	root	
-    │     ├─IndexScan_31	10.00	cop	table:te, index:trade_id, range: decided by [tr.id], keep order:false, stats:pseudo
-    │     └─Selection_33	250.00	cop	ge(te.expect_time, 2018-04-23 00:00:00.000000), le(te.expect_time, 2018-04-23 23:59:59.000000)
-    │       └─TableScan_32	10.00	cop	table:te, keep order:false, stats:pseudo
-    └─IndexReader_136	10.00	root	index:IndexScan_135
-      └─IndexScan_135	10.00	cop	table:p, index:relate_id, range: decided by [tr.id], keep order:false, stats:pseudo
+  └─IndexJoin_142	0.00	root	left outer join, inner:IndexReader_141, outer key:tr.id, inner key:p.relate_id
+    ├─TopN_145	0.00	root	te.expect_time:asc, offset:0, count:5
+    │ └─IndexJoin_36	0.00	root	inner join, inner:IndexLookUp_35, outer key:tr.id, inner key:te.trade_id
+    │   ├─IndexLookUp_106	0.00	root	
+    │   │ ├─Selection_104	0.00	cop	eq(tr.business_type, 18), in(tr.trade_type, 1)
+    │   │ │ └─IndexScan_102	10.00	cop	table:tr, index:shop_identy, trade_status, business_type, trade_pay_status, trade_type, delivery_type, source, biz_date, range:[810094178,810094178], keep order:false, stats:pseudo
+    │   │ └─Selection_105	0.00	cop	eq(tr.brand_identy, 32314), eq(tr.domain_type, 2)
+    │   │   └─TableScan_103	0.00	cop	table:tr, keep order:false
+    │   └─IndexLookUp_35	250.00	root	
+    │     ├─IndexScan_32	10.00	cop	table:te, index:trade_id, range: decided by [tr.id], keep order:false, stats:pseudo
+    │     └─Selection_34	250.00	cop	ge(te.expect_time, 2018-04-23 00:00:00.000000), le(te.expect_time, 2018-04-23 23:59:59.000000)
+    │       └─TableScan_33	10.00	cop	table:te, keep order:false, stats:pseudo
+    └─IndexReader_141	0.00	root	index:Selection_140
+      └─Selection_140	0.00	cop	not(isnull(p.relate_id))
+        └─IndexScan_139	10.00	cop	table:p, index:relate_id, range: decided by [tr.id], keep order:false, stats:pseudo
diff --git a/executor/index_lookup_join_test.go b/executor/index_lookup_join_test.go
index adeb966a1f764..9415e8ff9ffc9 100644
--- a/executor/index_lookup_join_test.go
+++ b/executor/index_lookup_join_test.go
@@ -48,10 +48,11 @@ func (s *testSuite1) TestIndexJoinUnionScan(c *C) {
 	tk.MustExec("insert into t2 values(2,2,2), (3,3,3)")
 	// TableScan below UnionScan
 	tk.MustQuery("explain select /*+ TIDB_INLJ(t1, t2)*/ * from t1 join t2 on t1.a = t2.id").Check(testkit.Rows(
-		"IndexJoin_11 12500.00 root inner join, inner:UnionScan_10, outer key:test.t1.a, inner key:test.t2.id",
-		"├─UnionScan_12 10000.00 root ",
-		"│ └─TableReader_14 10000.00 root data:TableScan_13",
-		"│   └─TableScan_13 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo",
+		"IndexJoin_11 12487.50 root inner join, inner:UnionScan_10, outer key:test.t1.a, inner key:test.t2.id",
+		"├─UnionScan_12 9990.00 root not(isnull(test.t1.a))",
+		"│ └─TableReader_15 9990.00 root data:Selection_14",
+		"│   └─Selection_14 9990.00 cop not(isnull(test.t1.a))",
+		"│     └─TableScan_13 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo",
 		"└─UnionScan_10 10.00 root ",
 		"  └─TableReader_9 10.00 root data:TableScan_8",
 		"    └─TableScan_8 10.00 cop table:t2, range: decided by [test.t1.a], keep order:false, stats:pseudo",
@@ -61,14 +62,16 @@ func (s *testSuite1) TestIndexJoinUnionScan(c *C) {
 	))
 	// IndexLookUp below UnionScan
 	tk.MustQuery("explain select /*+ TIDB_INLJ(t1, t2)*/ * from t1 join t2 on t1.a = t2.a").Check(testkit.Rows(
-		"IndexJoin_12 12500.00 root inner join, inner:UnionScan_11, outer key:test.t1.a, inner key:test.t2.a",
-		"├─UnionScan_13 10000.00 root ",
-		"│ └─TableReader_15 10000.00 root data:TableScan_14",
-		"│   └─TableScan_14 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo",
-		"└─UnionScan_11 10.00 root ",
-		"  └─IndexLookUp_10 10.00 root ",
-		"    ├─IndexScan_8 10.00 cop table:t2, index:a, range: decided by [test.t1.a], keep order:false, stats:pseudo",
-		"    └─TableScan_9 10.00 cop table:t2, keep order:false, stats:pseudo",
+		"IndexJoin_13 12487.50 root inner join, inner:UnionScan_12, outer key:test.t1.a, inner key:test.t2.a",
+		"├─UnionScan_14 9990.00 root not(isnull(test.t1.a))",
+		"│ └─TableReader_17 9990.00 root data:Selection_16",
+		"│   └─Selection_16 9990.00 cop not(isnull(test.t1.a))",
+		"│     └─TableScan_15 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo",
+		"└─UnionScan_12 0.00 root not(isnull(test.t2.a))",
+		"  └─IndexLookUp_11 0.00 root ",
+		"    ├─Selection_10 0.00 cop not(isnull(test.t2.a))",
+		"    │ └─IndexScan_8 10.00 cop table:t2, index:a, range: decided by [test.t1.a], keep order:false, stats:pseudo",
+		"    └─TableScan_9 0.00 cop table:t2, keep order:false",
 	))
 	tk.MustQuery("select /*+ TIDB_INLJ(t1, t2)*/ * from t1 join t2 on t1.a = t2.a").Check(testkit.Rows(
 		"2 2 2 2 2",
@@ -76,14 +79,16 @@ func (s *testSuite1) TestIndexJoinUnionScan(c *C) {
 	))
 	// IndexScan below UnionScan
 	tk.MustQuery("explain select /*+ TIDB_INLJ(t1, t2)*/ t1.a, t2.a from t1 join t2 on t1.a = t2.a").Check(testkit.Rows(
-		"Projection_7 12500.00 root test.t1.a, test.t2.a",
-		"└─IndexJoin_11 12500.00 root inner join, inner:UnionScan_10, outer key:test.t1.a, inner key:test.t2.a",
-		"  ├─UnionScan_12 10000.00 root ",
-		"  │ └─TableReader_14 10000.00 root data:TableScan_13",
-		"  │   └─TableScan_13 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo",
-		"  └─UnionScan_10 10.00 root ",
-		"    └─IndexReader_9 10.00 root index:IndexScan_8",
-		"      └─IndexScan_8 10.00 cop table:t2, index:a, range: decided by [test.t1.a], keep order:false, stats:pseudo",
+		"Projection_7 12487.50 root test.t1.a, test.t2.a",
+		"└─IndexJoin_12 12487.50 root inner join, inner:UnionScan_11, outer key:test.t1.a, inner key:test.t2.a",
+		"  ├─UnionScan_13 9990.00 root not(isnull(test.t1.a))",
+		"  │ └─TableReader_16 9990.00 root data:Selection_15",
+		"  │   └─Selection_15 9990.00 cop not(isnull(test.t1.a))",
+		"  │     └─TableScan_14 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo",
+		"  └─UnionScan_11 0.00 root not(isnull(test.t2.a))",
+		"    └─IndexReader_10 0.00 root index:Selection_9",
+		"      └─Selection_9 0.00 cop not(isnull(test.t2.a))",
+		"        └─IndexScan_8 10.00 cop table:t2, index:a, range: decided by [test.t1.a], keep order:false, stats:pseudo",
 	))
 	tk.MustQuery("select /*+ TIDB_INLJ(t1, t2)*/ t1.a, t2.a from t1 join t2 on t1.a = t2.a").Check(testkit.Rows(
 		"2 2",
@@ -104,13 +109,15 @@ func (s *testSuite1) TestBatchIndexJoinUnionScan(c *C) {
 	tk.MustExec("insert into t2 values(1,1)")
 	tk.MustQuery("explain select /*+ TIDB_INLJ(t1, t2)*/ count(*) from t1 join t2 on t1.a = t2.a").Check(testkit.Rows(
 		"StreamAgg_13 1.00 root funcs:count(1)",
-		"└─IndexJoin_24 12500.00 root inner join, inner:UnionScan_23, outer key:test.t1.a, inner key:test.t2.a",
-		"  ├─UnionScan_25 10000.00 root ",
-		"  │ └─TableReader_27 10000.00 root data:TableScan_26",
-		"  │   └─TableScan_26 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo",
-		"  └─UnionScan_23 10.00 root ",
-		"    └─IndexReader_22 10.00 root index:IndexScan_21",
-		"      └─IndexScan_21 10.00 cop table:t2, index:a, range: decided by [test.t1.a], keep order:false, stats:pseudo",
+		"└─IndexJoin_27 12487.50 root inner join, inner:UnionScan_26, outer key:test.t1.a, inner key:test.t2.a",
+		"  ├─UnionScan_28 9990.00 root not(isnull(test.t1.a))",
+		"  │ └─TableReader_31 9990.00 root data:Selection_30",
+		"  │   └─Selection_30 9990.00 cop not(isnull(test.t1.a))",
+		"  │     └─TableScan_29 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo",
+		"  └─UnionScan_26 0.00 root not(isnull(test.t2.a))",
+		"    └─IndexReader_25 0.00 root index:Selection_24",
+		"      └─Selection_24 0.00 cop not(isnull(test.t2.a))",
+		"        └─IndexScan_23 10.00 cop table:t2, index:a, range: decided by [test.t1.a], keep order:false, stats:pseudo",
 	))
 	tk.MustQuery("select /*+ TIDB_INLJ(t1, t2)*/ count(*) from t1 join t2 on t1.a = t2.id").Check(testkit.Rows(
 		"4",
diff --git a/executor/join_test.go b/executor/join_test.go
index e51fd86df9ab4..e55a1c62a5d71 100644
--- a/executor/join_test.go
+++ b/executor/join_test.go
@@ -944,47 +944,17 @@ func (s *testSuite2) TestHashJoin(c *C) {
 	tk.MustQuery("select count(*) from t2").Check(testkit.Rows("0"))
 	tk.MustExec("set @@tidb_max_chunk_size=1;")
 	result := tk.MustQuery("explain analyze select /*+ TIDB_HJ(t1, t2) */ * from t1 where exists (select a from t2 where t1.a = t2.a);")
-	// id	count	task	operator info	execution info
-	// HashLeftJoin_9	8000.00	root	semi join, inner:TableReader_13, equal:[eq(test.t1.a, test.t2.a)]	time:1.036712ms, loops:1, rows:0
-	// ├─TableReader_11	10000.00	root	data:TableScan_10	time:441.096µs, loops:1, rows:1
-	// │ └─TableScan_10	10000.00	cop	table:t1, range:[-inf,+inf], keep order:false, stats:pseudo
-	// └─TableReader_13	10000.00	root	data:TableScan_12	time:212.376µs, loops:1, rows:0
-	//   └─TableScan_12	10000.00	cop	table:t2, range:[-inf,+inf], keep order:false, stats:pseudo
+	// HashLeftJoin_9 7992.00 root semi join, inner:TableReader_15, equal:[eq(test.t1.a, test.t2.a)] time:219.863µs, loops:1, rows:0
+	// ├─TableReader_12 9990.00 root data:Selection_11 time:9.129µs, loops:1, rows:1
+	// │ └─Selection_11 9990.00 cop not(isnull(test.t1.a))
+	// │   └─TableScan_10 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo time:0s, loops:0, rows:5
+	// └─TableReader_15 9990.00 root data:Selection_14 time:12.983µs, loops:1, rows:0
+	//   └─Selection_14 9990.00 cop not(isnull(test.t2.a))
+	//       └─TableScan_13 10000.00 cop table:t2, range:[-inf,+inf], keep order:false, stats:pseudo time:0s, loops:0, rows:0
 	row := result.Rows()
-	c.Assert(len(row), Equals, 5)
+	c.Assert(len(row), Equals, 7)
 	outerExecInfo := row[1][4].(string)
 	c.Assert(outerExecInfo[len(outerExecInfo)-1:], Equals, "1")
-	innerExecInfo := row[3][4].(string)
+	innerExecInfo := row[4][4].(string)
 	c.Assert(innerExecInfo[len(innerExecInfo)-1:], Equals, "0")
-
-	tk.MustExec("insert into t2 select * from t1;")
-	tk.MustExec("delete from t1;")
-	tk.MustQuery("select count(*) from t1").Check(testkit.Rows("0"))
-	tk.MustQuery("select count(*) from t2").Check(testkit.Rows("5"))
-	result = tk.MustQuery("explain analyze select /*+ TIDB_HJ(t1, t2) */ * from t1 where not exists (select a from t2 where t1.a = t2.a);")
-	// id	count	task	operator info	execution info
-	// HashLeftJoin_9	8000.00	root	anti semi join, inner:TableReader_13, equal:[eq(test.t1.a, test.t2.a)]	time:534.643µs, loops:1, rows:0
-	// ├─TableReader_11	10000.00	root	data:TableScan_10	time:35.042µs, loops:1, rows:0
-	// │ └─TableScan_10	10000.00	cop	table:t1, range:[-inf,+inf], keep order:false, stats:pseudo
-	// └─TableReader_13	10000.00	root	data:TableScan_12	time:0s, loops:0, rows:0
-	//   └─TableScan_12	10000.00	cop	table:t2, range:[-inf,+inf], keep order:false, stats:pseudo
-	row = result.Rows()
-	c.Assert(len(row), Equals, 5)
-	outerExecInfo = row[1][4].(string)
-	c.Assert(outerExecInfo[len(outerExecInfo)-1:], Equals, "0")
-	innerExecInfo = row[3][4].(string)
-	c.Assert(innerExecInfo[len(innerExecInfo)-1:], LessEqual, "5")
-
-	result = tk.MustQuery("explain select /*+ TIDB_HJ(t1, t2) */ * from t1 left outer join t2 on t1.a = t2.a;")
-	// id	count	task	operator info	execution info
-	// HashLeftJoin_6	12500.00	root	left outer join, inner:TableReader_10, equal:[eq(test.t1.a, test.t2.a)]	time:502.553µs, loops:1, rows:0
-	// ├─TableReader_8	10000.00	root	data:TableScan_7	time:27.302µs, loops:1, rows:0
-	// │ └─TableScan_7	10000.00	cop	table:t1, range:[-inf,+inf], keep order:false, stats:pseudo
-	// └─TableReader_10	10000.00	root	data:TableScan_9	time:0s, loops:0, rows:0
-	//   └─TableScan_9	10000.00	cop	table:t2, range:[-inf,+inf], keep order:false, stats:pseudo
-	c.Assert(len(row), Equals, 5)
-	outerExecInfo = row[1][4].(string)
-	c.Assert(outerExecInfo[len(outerExecInfo)-1:], Equals, "0")
-	innerExecInfo = row[3][4].(string)
-	c.Assert(innerExecInfo[len(innerExecInfo)-1:], LessEqual, "5")
 }
diff --git a/expression/builtin.go b/expression/builtin.go
index 0cc7756e38f25..9a90e81ebc0e1 100644
--- a/expression/builtin.go
+++ b/expression/builtin.go
@@ -309,7 +309,9 @@ type functionClass interface {
 	getFunction(ctx sessionctx.Context, args []Expression) (builtinFunc, error)
 }
 
-// funcs holds all registered builtin functions.
+// funcs holds all registered builtin functions. When new function is added,
+// check expression/function_traits.go to see if it should be appended to
+// any set there.
 var funcs = map[string]functionClass{
 	// common functions
 	ast.Coalesce: &coalesceFunctionClass{baseFunctionClass{ast.Coalesce, 1, -1}},
diff --git a/expression/constant_propagation.go b/expression/constant_propagation.go
index ec69cf62ef05f..130417a6dee88 100644
--- a/expression/constant_propagation.go
+++ b/expression/constant_propagation.go
@@ -300,6 +300,9 @@ type propOuterJoinConstSolver struct {
 	filterConds []Expression
 	outerSchema *Schema
 	innerSchema *Schema
+	// nullSensitive indicates if this outer join is null sensitive, if true, we cannot generate
+	// additional `col is not null` condition from column equal conditions.
+	nullSensitive bool
 }
 
 func (s *propOuterJoinConstSolver) setConds2ConstFalse(filterConds bool) {
@@ -461,7 +464,7 @@ func (s *propOuterJoinConstSolver) deriveConds(outerCol, innerCol *Column, schem
 // 'expression(..., outerCol, ...)' does not reference columns outside children schemas of join node.
 // Derived new expressions must be appended into join condition, not filter condition.
 func (s *propOuterJoinConstSolver) propagateColumnEQ() {
-	visited := make([]bool, len(s.joinConds)+len(s.filterConds))
+	visited := make([]bool, 2*len(s.joinConds)+len(s.filterConds))
 	s.unionSet = disjointset.NewIntSet(len(s.columns))
 	var outerCol, innerCol *Column
 	// Only consider column equal condition in joinConds.
@@ -473,6 +476,22 @@ func (s *propOuterJoinConstSolver) propagateColumnEQ() {
 			innerID := s.getColID(innerCol)
 			s.unionSet.Union(outerID, innerID)
 			visited[i] = true
+			// Generate `innerCol is not null` from `outerCol = innerCol`. Note that `outerCol is not null`
+			// does not hold since we are in outer join.
+			// For AntiLeftOuterSemiJoin, this does not work, for example:
+			// `select *, t1.a not in (select t2.b from t t2) from t t1` does not imply `t2.b is not null`.
+			// For LeftOuterSemiJoin, this does not work either, for example:
+			// `select *, t1.a in (select t2.b from t t2) from t t1`
+			// rows with t2.b is null would impact whether LeftOuterSemiJoin should output 0 or null if there
+			// is no row satisfying t2.b = t1.a
+			if s.nullSensitive {
+				continue
+			}
+			childCol := s.innerSchema.RetrieveColumn(innerCol)
+			if !mysql.HasNotNullFlag(childCol.RetType.Flag) {
+				notNullExpr := BuildNotNullExpr(s.ctx, childCol)
+				s.joinConds = append(s.joinConds, notNullExpr)
+			}
 		}
 	}
 	lenJoinConds := len(s.joinConds)
@@ -538,10 +557,12 @@ func propagateConstantDNF(ctx sessionctx.Context, conds []Expression) []Expressi
 // Second step is to extract `outerCol = innerCol` from join conditions, and derive new join
 // conditions based on this column equal condition and `outerCol` related
 // expressions in join conditions and filter conditions;
-func PropConstOverOuterJoin(ctx sessionctx.Context, joinConds, filterConds []Expression, outerSchema, innerSchema *Schema) ([]Expression, []Expression) {
+func PropConstOverOuterJoin(ctx sessionctx.Context, joinConds, filterConds []Expression,
+	outerSchema, innerSchema *Schema, nullSensitive bool) ([]Expression, []Expression) {
 	solver := &propOuterJoinConstSolver{
-		outerSchema: outerSchema,
-		innerSchema: innerSchema,
+		outerSchema:   outerSchema,
+		innerSchema:   innerSchema,
+		nullSensitive: nullSensitive,
 	}
 	solver.colMapper = make(map[int64]int)
 	solver.ctx = ctx
diff --git a/expression/constant_propagation_test.go b/expression/constant_propagation_test.go
index a9d2b49ff9e19..f1727d9356587 100644
--- a/expression/constant_propagation_test.go
+++ b/expression/constant_propagation_test.go
@@ -87,7 +87,7 @@ func (s *testSuite) TestOuterJoinPropConst(c *C) {
 		"├─TableReader_8 10000.00 root data:TableScan_7",
 		"│ └─TableScan_7 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo",
 		"└─TableReader_11 3333.33 root data:Selection_10",
-		"  └─Selection_10 3333.33 cop gt(test.t2.a, 1)",
+		"  └─Selection_10 3333.33 cop gt(test.t2.a, 1), not(isnull(test.t2.a))",
 		"    └─TableScan_9 10000.00 cop table:t2, range:[-inf,+inf], keep order:false, stats:pseudo",
 	))
 	tk.MustQuery("explain select * from t1 left join t2 on t1.a = t2.a where t1.a > 1;").Check(testkit.Rows(
@@ -96,7 +96,7 @@ func (s *testSuite) TestOuterJoinPropConst(c *C) {
 		"│ └─Selection_9 3333.33 cop gt(test.t1.a, 1)",
 		"│   └─TableScan_8 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo",
 		"└─TableReader_13 3333.33 root data:Selection_12",
-		"  └─Selection_12 3333.33 cop gt(test.t2.a, 1)",
+		"  └─Selection_12 3333.33 cop gt(test.t2.a, 1), not(isnull(test.t2.a))",
 		"    └─TableScan_11 10000.00 cop table:t2, range:[-inf,+inf], keep order:false, stats:pseudo",
 	))
 	tk.MustQuery("explain select * from t1 right join t2 on t1.a > t2.a where t2.a = 1;").Check(testkit.Rows(
@@ -111,7 +111,7 @@ func (s *testSuite) TestOuterJoinPropConst(c *C) {
 	tk.MustQuery("explain select * from t1 right join t2 on t1.a = t2.a where t2.a > 1;").Check(testkit.Rows(
 		"HashRightJoin_7 4166.67 root right outer join, inner:TableReader_10, equal:[eq(test.t1.a, test.t2.a)]",
 		"├─TableReader_10 3333.33 root data:Selection_9",
-		"│ └─Selection_9 3333.33 cop gt(test.t1.a, 1)",
+		"│ └─Selection_9 3333.33 cop gt(test.t1.a, 1), not(isnull(test.t1.a))",
 		"│   └─TableScan_8 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo",
 		"└─TableReader_13 3333.33 root data:Selection_12",
 		"  └─Selection_12 3333.33 cop gt(test.t2.a, 1)",
@@ -120,7 +120,7 @@ func (s *testSuite) TestOuterJoinPropConst(c *C) {
 	tk.MustQuery("explain select * from t1 right join t2 on t1.a = t2.a and t2.a > 1;").Check(testkit.Rows(
 		"HashRightJoin_6 10000.00 root right outer join, inner:TableReader_9, equal:[eq(test.t1.a, test.t2.a)], right cond:gt(test.t2.a, 1)",
 		"├─TableReader_9 3333.33 root data:Selection_8",
-		"│ └─Selection_8 3333.33 cop gt(test.t1.a, 1)",
+		"│ └─Selection_8 3333.33 cop gt(test.t1.a, 1), not(isnull(test.t1.a))",
 		"│   └─TableScan_7 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo",
 		"└─TableReader_11 10000.00 root data:TableScan_10",
 		"  └─TableScan_10 10000.00 cop table:t2, range:[-inf,+inf], keep order:false, stats:pseudo",
@@ -139,7 +139,7 @@ func (s *testSuite) TestOuterJoinPropConst(c *C) {
 		"├─TableReader_8 10000.00 root data:TableScan_7",
 		"│ └─TableScan_7 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo",
 		"└─TableReader_11 3333.33 root data:Selection_10",
-		"  └─Selection_10 3333.33 cop gt(test.t2.a, 1)",
+		"  └─Selection_10 3333.33 cop gt(test.t2.a, 1), not(isnull(test.t2.a))",
 		"    └─TableScan_9 10000.00 cop table:t2, range:[-inf,+inf], keep order:false, stats:pseudo",
 	))
 	tk.MustQuery("explain select * from t1 left join t2 on t1.a > t2.a and t2.a = 1;").Check(testkit.Rows(
@@ -147,13 +147,13 @@ func (s *testSuite) TestOuterJoinPropConst(c *C) {
 		"├─TableReader_8 10000.00 root data:TableScan_7",
 		"│ └─TableScan_7 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo",
 		"└─TableReader_11 10.00 root data:Selection_10",
-		"  └─Selection_10 10.00 cop eq(test.t2.a, 1)",
+		"  └─Selection_10 10.00 cop eq(test.t2.a, 1), not(isnull(test.t2.a))",
 		"    └─TableScan_9 10000.00 cop table:t2, range:[-inf,+inf], keep order:false, stats:pseudo",
 	))
 	tk.MustQuery("explain select * from t1 right join t2 on t1.a > t2.a and t1.a = 1;").Check(testkit.Rows(
 		"HashRightJoin_6 100000.00 root right outer join, inner:TableReader_9, other cond:gt(test.t1.a, test.t2.a)",
 		"├─TableReader_9 10.00 root data:Selection_8",
-		"│ └─Selection_8 10.00 cop eq(test.t1.a, 1)",
+		"│ └─Selection_8 10.00 cop eq(test.t1.a, 1), not(isnull(test.t1.a))",
 		"│   └─TableScan_7 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo",
 		"└─TableReader_11 10000.00 root data:TableScan_10",
 		"  └─TableScan_10 10000.00 cop table:t2, range:[-inf,+inf], keep order:false, stats:pseudo",
@@ -161,7 +161,7 @@ func (s *testSuite) TestOuterJoinPropConst(c *C) {
 	tk.MustQuery("explain select * from t1 right join t2 on t1.a = t2.a and t1.a > 1;").Check(testkit.Rows(
 		"HashRightJoin_6 10000.00 root right outer join, inner:TableReader_9, equal:[eq(test.t1.a, test.t2.a)]",
 		"├─TableReader_9 3333.33 root data:Selection_8",
-		"│ └─Selection_8 3333.33 cop gt(test.t1.a, 1)",
+		"│ └─Selection_8 3333.33 cop gt(test.t1.a, 1), not(isnull(test.t1.a))",
 		"│   └─TableScan_7 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo",
 		"└─TableReader_11 10000.00 root data:TableScan_10",
 		"  └─TableScan_10 10000.00 cop table:t2, range:[-inf,+inf], keep order:false, stats:pseudo",
@@ -197,7 +197,7 @@ func (s *testSuite) TestOuterJoinPropConst(c *C) {
 	tk.MustQuery("explain select * from t1 left join t2 on true where t1.a = 1 and t1.a = 1;").Check(testkit.Rows(
 		"HashLeftJoin_7 80000.00 root left outer join, inner:TableReader_12",
 		"├─TableReader_10 10.00 root data:Selection_9",
-		"│ └─Selection_9 10.00 cop eq(test.t1.a, 1), eq(test.t1.a, 1)",
+		"│ └─Selection_9 10.00 cop eq(test.t1.a, 1)",
 		"│   └─TableScan_8 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo",
 		"└─TableReader_12 10000.00 root data:TableScan_11",
 		"  └─TableScan_11 10000.00 cop table:t2, range:[-inf,+inf], keep order:false, stats:pseudo",
diff --git a/expression/function_traits.go b/expression/function_traits.go
index 24ffa391db6c0..c1c39b9309834 100644
--- a/expression/function_traits.go
+++ b/expression/function_traits.go
@@ -62,3 +62,44 @@ var DeferredFunctions = map[string]struct{}{
 var inequalFunctions = map[string]struct{}{
 	ast.IsNull: {},
 }
+
+// mutableEffectsFunctions stores functions which are mutable or have side effects, specifically,
+// we cannot remove them from filter even if they have duplicates.
+var mutableEffectsFunctions = map[string]struct{}{
+	// Time related functions in MySQL have various behaviors when executed multiple times in a single SQL,
+	// for example:
+	// mysql> select current_timestamp(), sleep(5), current_timestamp();
+	// +---------------------+----------+---------------------+
+	// | current_timestamp() | sleep(5) | current_timestamp() |
+	// +---------------------+----------+---------------------+
+	// | 2018-12-18 17:55:39 |        0 | 2018-12-18 17:55:39 |
+	// +---------------------+----------+---------------------+
+	// while:
+	// mysql> select sysdate(), sleep(5), sysdate();
+	// +---------------------+----------+---------------------+
+	// | sysdate()           | sleep(5) | sysdate()           |
+	// +---------------------+----------+---------------------+
+	// | 2018-12-18 17:57:38 |        0 | 2018-12-18 17:57:43 |
+	// +---------------------+----------+---------------------+
+	// for safety consideration, treat them all as mutable.
+	ast.Now:              {},
+	ast.CurrentTimestamp: {},
+	ast.UTCTime:          {},
+	ast.Curtime:          {},
+	ast.CurrentTime:      {},
+	ast.UTCTimestamp:     {},
+	ast.UnixTimestamp:    {},
+	ast.Sysdate:          {},
+	ast.Curdate:          {},
+	ast.CurrentDate:      {},
+	ast.UTCDate:          {},
+
+	ast.Rand:        {},
+	ast.RandomBytes: {},
+	ast.UUID:        {},
+	ast.UUIDShort:   {},
+	ast.Sleep:       {},
+	ast.SetVar:      {},
+	ast.GetVar:      {},
+	ast.AnyValue:    {},
+}
diff --git a/expression/util.go b/expression/util.go
index 66501b9e1cc0b..008d80598d2f0 100644
--- a/expression/util.go
+++ b/expression/util.go
@@ -587,3 +587,47 @@ func GetIntFromConstant(ctx sessionctx.Context, value Expression) (int, bool, er
 	}
 	return intNum, false, nil
 }
+
+// BuildNotNullExpr wraps up `not(isnull())` for given expression.
+func BuildNotNullExpr(ctx sessionctx.Context, expr Expression) Expression {
+	isNull := NewFunctionInternal(ctx, ast.IsNull, types.NewFieldType(mysql.TypeTiny), expr)
+	notNull := NewFunctionInternal(ctx, ast.UnaryNot, types.NewFieldType(mysql.TypeTiny), isNull)
+	return notNull
+}
+
+// isMutableEffectsExpr checks if expr contains function which is mutable or has side effects.
+func isMutableEffectsExpr(expr Expression) bool {
+	switch x := expr.(type) {
+	case *ScalarFunction:
+		if _, ok := mutableEffectsFunctions[x.FuncName.L]; ok {
+			return true
+		}
+		for _, arg := range x.GetArgs() {
+			if isMutableEffectsExpr(arg) {
+				return true
+			}
+		}
+	case *Column:
+	case *Constant:
+		if x.DeferredExpr != nil {
+			return isMutableEffectsExpr(x.DeferredExpr)
+		}
+	}
+	return false
+}
+
+// RemoveDupExprs removes identical exprs. Not that if expr contains functions which
+// are mutable or have side effects, we cannot remove it even if it has duplicates.
+func RemoveDupExprs(ctx sessionctx.Context, exprs []Expression) []Expression {
+	res := make([]Expression, 0, len(exprs))
+	exists := make(map[string]struct{}, len(exprs))
+	sc := ctx.GetSessionVars().StmtCtx
+	for _, expr := range exprs {
+		key := string(expr.HashCode(sc))
+		if _, ok := exists[key]; !ok || isMutableEffectsExpr(expr) {
+			res = append(res, expr)
+			exists[key] = struct{}{}
+		}
+	}
+	return res
+}
diff --git a/planner/core/cbo_test.go b/planner/core/cbo_test.go
index 5cf32c9b54a12..26c51b02264b0 100644
--- a/planner/core/cbo_test.go
+++ b/planner/core/cbo_test.go
@@ -112,11 +112,13 @@ func (s *testAnalyzeSuite) TestCBOWithoutAnalyze(c *C) {
 	h.DumpStatsDeltaToKV(statistics.DumpAll)
 	c.Assert(h.Update(dom.InfoSchema()), IsNil)
 	testKit.MustQuery("explain select * from t1, t2 where t1.a = t2.a").Check(testkit.Rows(
-		"HashLeftJoin_8 7.50 root inner join, inner:TableReader_13, equal:[eq(test.t1.a, test.t2.a)]",
-		"├─TableReader_11 6.00 root data:TableScan_10",
-		"│ └─TableScan_10 6.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo",
-		"└─TableReader_13 6.00 root data:TableScan_12",
-		"  └─TableScan_12 6.00 cop table:t2, range:[-inf,+inf], keep order:false, stats:pseudo",
+		"HashLeftJoin_8 7.49 root inner join, inner:TableReader_15, equal:[eq(test.t1.a, test.t2.a)]",
+		"├─TableReader_12 5.99 root data:Selection_11",
+		"│ └─Selection_11 5.99 cop not(isnull(test.t1.a))",
+		"│   └─TableScan_10 6.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo",
+		"└─TableReader_15 5.99 root data:Selection_14",
+		"  └─Selection_14 5.99 cop not(isnull(test.t2.a))",
+		"    └─TableScan_13 6.00 cop table:t2, range:[-inf,+inf], keep order:false, stats:pseudo",
 	))
 }
 
@@ -165,17 +167,19 @@ func (s *testAnalyzeSuite) TestStraightJoin(c *C) {
 	))
 
 	testKit.MustQuery("explain select straight_join * from t1, t2, t3, t4 where t1.a=t4.a;").Check(testkit.Rows(
-		"HashLeftJoin_11 1250000000000.00 root inner join, inner:TableReader_24, equal:[eq(test.t1.a, test.t4.a)]",
-		"├─HashLeftJoin_13 1000000000000.00 root inner join, inner:TableReader_22",
-		"│ ├─HashLeftJoin_15 100000000.00 root inner join, inner:TableReader_20",
-		"│ │ ├─TableReader_18 10000.00 root data:TableScan_17",
-		"│ │ │ └─TableScan_17 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo",
-		"│ │ └─TableReader_20 10000.00 root data:TableScan_19",
-		"│ │   └─TableScan_19 10000.00 cop table:t2, range:[-inf,+inf], keep order:false, stats:pseudo",
-		"│ └─TableReader_22 10000.00 root data:TableScan_21",
-		"│   └─TableScan_21 10000.00 cop table:t3, range:[-inf,+inf], keep order:false, stats:pseudo",
-		"└─TableReader_24 10000.00 root data:TableScan_23",
-		"  └─TableScan_23 10000.00 cop table:t4, range:[-inf,+inf], keep order:false, stats:pseudo",
+		"HashLeftJoin_11 1248750000000.00 root inner join, inner:TableReader_26, equal:[eq(test.t1.a, test.t4.a)]",
+		"├─HashLeftJoin_13 999000000000.00 root inner join, inner:TableReader_23",
+		"│ ├─HashRightJoin_16 99900000.00 root inner join, inner:TableReader_19",
+		"│ │ ├─TableReader_19 9990.00 root data:Selection_18",
+		"│ │ │ └─Selection_18 9990.00 cop not(isnull(test.t1.a))",
+		"│ │ │   └─TableScan_17 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo",
+		"│ │ └─TableReader_21 10000.00 root data:TableScan_20",
+		"│ │   └─TableScan_20 10000.00 cop table:t2, range:[-inf,+inf], keep order:false, stats:pseudo",
+		"│ └─TableReader_23 10000.00 root data:TableScan_22",
+		"│   └─TableScan_22 10000.00 cop table:t3, range:[-inf,+inf], keep order:false, stats:pseudo",
+		"└─TableReader_26 9990.00 root data:Selection_25",
+		"  └─Selection_25 9990.00 cop not(isnull(test.t4.a))",
+		"    └─TableScan_24 10000.00 cop table:t4, range:[-inf,+inf], keep order:false, stats:pseudo",
 	))
 }
 
@@ -409,11 +413,11 @@ func (s *testAnalyzeSuite) TestEmptyTable(c *C) {
 		},
 		{
 			sql:  "select * from t where c1 in (select c1 from t1)",
-			best: "RightHashJoin{TableReader(Table(t1)->HashAgg)->HashAgg->TableReader(Table(t))}(test.t1.c1,test.t.c1)->Projection",
+			best: "RightHashJoin{TableReader(Table(t1)->Sel([not(isnull(test.t1.c1))])->HashAgg)->HashAgg->TableReader(Table(t)->Sel([not(isnull(test.t.c1))]))}(test.t1.c1,test.t.c1)->Projection",
 		},
 		{
 			sql:  "select * from t, t1 where t.c1 = t1.c1",
-			best: "LeftHashJoin{TableReader(Table(t))->TableReader(Table(t1))}(test.t.c1,test.t1.c1)",
+			best: "LeftHashJoin{TableReader(Table(t)->Sel([not(isnull(test.t.c1))]))->TableReader(Table(t1)->Sel([not(isnull(test.t1.c1))]))}(test.t.c1,test.t1.c1)",
 		},
 		{
 			sql:  "select * from t limit 0",
@@ -690,10 +694,10 @@ func (s *testAnalyzeSuite) TestCorrelatedEstimation(c *C) {
 			"  └─StreamAgg_20 1.00 root funcs:count(1)",
 			"    └─HashLeftJoin_21 1.00 root inner join, inner:TableReader_28, equal:[eq(s.a, t1.a)]",
 			"      ├─TableReader_25 1.00 root data:Selection_24",
-			"      │ └─Selection_24 1.00 cop eq(s.a, test.t.a)",
+			"      │ └─Selection_24 1.00 cop eq(s.a, test.t.a), not(isnull(s.a))",
 			"      │   └─TableScan_23 10.00 cop table:s, range:[-inf,+inf], keep order:false",
 			"      └─TableReader_28 1.00 root data:Selection_27",
-			"        └─Selection_27 1.00 cop eq(t1.a, test.t.a)",
+			"        └─Selection_27 1.00 cop eq(t1.a, test.t.a), not(isnull(t1.a))",
 			"          └─TableScan_26 10.00 cop table:t1, range:[-inf,+inf], keep order:false",
 		))
 	tk.MustQuery("explain select (select concat(t1.a, \",\", t1.b) from t t1 where t1.a=t.a and t1.c=t.c) from t").
diff --git a/planner/core/expression_rewriter.go b/planner/core/expression_rewriter.go
index 2a4af05983ace..538a9a266bbcd 100644
--- a/planner/core/expression_rewriter.go
+++ b/planner/core/expression_rewriter.go
@@ -648,17 +648,11 @@ func (er *expressionRewriter) handleInSubquery(v *ast.PatternInExpr) (ast.Node,
 		for _, col := range agg.schema.Columns {
 			col.IsReferenced = true
 		}
-		eq, left, right, other := extractOnCondition(expression.SplitCNFItems(checkCondition), er.p, agg, false, false)
 		// Build inner join above the aggregation.
-		join := LogicalJoin{
-			JoinType:        InnerJoin,
-			EqualConditions: eq,
-			LeftConditions:  left,
-			RightConditions: right,
-			OtherConditions: other,
-		}.Init(er.ctx)
+		join := LogicalJoin{JoinType: InnerJoin}.Init(er.ctx)
 		join.SetChildren(er.p, agg)
 		join.SetSchema(expression.MergeSchema(er.p.Schema(), agg.schema))
+		join.attachOnConds(expression.SplitCNFItems(checkCondition))
 		// Set join hint for this join.
 		if er.b.TableHints() != nil {
 			er.err = join.setPreferredJoinType(er.b.TableHints())
diff --git a/planner/core/logical_plan_builder.go b/planner/core/logical_plan_builder.go
index 72aa76dddb3e4..be68b36b92daf 100644
--- a/planner/core/logical_plan_builder.go
+++ b/planner/core/logical_plan_builder.go
@@ -180,21 +180,45 @@ func (b *PlanBuilder) buildResultSetNode(node ast.ResultSetNode) (p LogicalPlan,
 // extractOnCondition divide conditions in CNF of join node into 4 groups.
 // These conditions can be where conditions, join conditions, or collection of both.
 // If deriveLeft/deriveRight is set, we would try to derive more conditions for left/right plan.
-func extractOnCondition(conditions []expression.Expression, left LogicalPlan, right LogicalPlan,
-	deriveLeft bool, deriveRight bool) (eqCond []*expression.ScalarFunction, leftCond []expression.Expression,
+func (p *LogicalJoin) extractOnCondition(conditions []expression.Expression, deriveLeft bool,
+	deriveRight bool) (eqCond []*expression.ScalarFunction, leftCond []expression.Expression,
 	rightCond []expression.Expression, otherCond []expression.Expression) {
+	left, right := p.children[0], p.children[1]
 	for _, expr := range conditions {
 		binop, ok := expr.(*expression.ScalarFunction)
-		if ok && binop.FuncName.L == ast.EQ {
-			ln, lOK := binop.GetArgs()[0].(*expression.Column)
-			rn, rOK := binop.GetArgs()[1].(*expression.Column)
+		if ok && len(binop.GetArgs()) == 2 {
+			ctx := binop.GetCtx()
+			arg0, lOK := binop.GetArgs()[0].(*expression.Column)
+			arg1, rOK := binop.GetArgs()[1].(*expression.Column)
 			if lOK && rOK {
-				if left.Schema().Contains(ln) && right.Schema().Contains(rn) {
-					eqCond = append(eqCond, binop)
-					continue
+				var leftCol, rightCol *expression.Column
+				if left.Schema().Contains(arg0) && right.Schema().Contains(arg1) {
+					leftCol, rightCol = arg0, arg1
+				}
+				if leftCol == nil && left.Schema().Contains(arg1) && right.Schema().Contains(arg0) {
+					leftCol, rightCol = arg1, arg0
+				}
+				if leftCol != nil {
+					// Do not derive `is not null` for anti join, since it may result in wrong results.
+					// For example:
+					// `select * from t t1 where t1.a not in (select b from t t2)` does not imply `t2.b is not null`,
+					// `select * from t t1 where t1.a not in (select a from t t2 where t1.b = t2.b` does not imply `t1.b is not null`,
+					// `select * from t t1 where not exists (select * from t t2 where t2.a = t1.a)` does not imply `t1.a is not null`,
+					if deriveLeft && p.JoinType != AntiSemiJoin && p.JoinType != AntiLeftOuterSemiJoin {
+						if isNullRejected(ctx, left.Schema(), expr) && !mysql.HasNotNullFlag(leftCol.RetType.Flag) {
+							notNullExpr := expression.BuildNotNullExpr(ctx, leftCol)
+							leftCond = append(leftCond, notNullExpr)
+						}
+					}
+					if deriveRight && p.JoinType != AntiSemiJoin && p.JoinType != AntiLeftOuterSemiJoin {
+						if isNullRejected(ctx, right.Schema(), expr) && !mysql.HasNotNullFlag(rightCol.RetType.Flag) {
+							notNullExpr := expression.BuildNotNullExpr(ctx, rightCol)
+							rightCond = append(rightCond, notNullExpr)
+						}
+					}
 				}
-				if left.Schema().Contains(rn) && right.Schema().Contains(ln) {
-					cond := expression.NewFunctionInternal(binop.GetCtx(), ast.EQ, types.NewFieldType(mysql.TypeTiny), rn, ln)
+				if leftCol != nil && binop.FuncName.L == ast.EQ {
+					cond := expression.NewFunctionInternal(ctx, ast.EQ, types.NewFieldType(mysql.TypeTiny), leftCol, rightCol)
 					eqCond = append(eqCond, cond.(*expression.ScalarFunction))
 					continue
 				}
diff --git a/planner/core/logical_plan_test.go b/planner/core/logical_plan_test.go
index 1ea9f82e35dd2..1b87cbd8f2422 100644
--- a/planner/core/logical_plan_test.go
+++ b/planner/core/logical_plan_test.go
@@ -285,6 +285,12 @@ func (s *testPlanSuite) TestJoinPredicatePushDown(c *C) {
 			left:  "[]",
 			right: "[or(or(eq(t2.a, 3), eq(t2.a, 4)), eq(t2.a, 2))]",
 		},
+		// Duplicate condition would be removed.
+		{
+			sql:   "select * from t t1 join t t2 on t1.a > 1 and t1.a > 1",
+			left:  "[gt(t1.a, 1)]",
+			right: "[]",
+		},
 	}
 	for _, ca := range tests {
 		comment := Commentf("for %s", ca.sql)
@@ -449,6 +455,134 @@ func (s *testPlanSuite) TestAntiSemiJoinConstFalse(c *C) {
 	}
 }
 
+func (s *testPlanSuite) TestDeriveNotNullConds(c *C) {
+	defer testleak.AfterTest(c)()
+	tests := []struct {
+		sql   string
+		plan  string
+		left  string
+		right string
+	}{
+		{
+			sql:   "select * from t t1 inner join t t2 on t1.e = t2.e",
+			plan:  "Join{DataScan(t1)->DataScan(t2)}(t1.e,t2.e)->Projection",
+			left:  "[not(isnull(t1.e))]",
+			right: "[not(isnull(t2.e))]",
+		},
+		{
+			sql:   "select * from t t1 inner join t t2 on t1.e > t2.e",
+			plan:  "Join{DataScan(t1)->DataScan(t2)}->Projection",
+			left:  "[not(isnull(t1.e))]",
+			right: "[not(isnull(t2.e))]",
+		},
+		{
+			sql:   "select * from t t1 inner join t t2 on t1.e = t2.e and t1.e is not null",
+			plan:  "Join{DataScan(t1)->DataScan(t2)}(t1.e,t2.e)->Projection",
+			left:  "[not(isnull(t1.e))]",
+			right: "[not(isnull(t2.e))]",
+		},
+		{
+			sql:   "select * from t t1 left join t t2 on t1.e = t2.e",
+			plan:  "Join{DataScan(t1)->DataScan(t2)}(t1.e,t2.e)->Projection",
+			left:  "[]",
+			right: "[not(isnull(t2.e))]",
+		},
+		{
+			sql:   "select * from t t1 left join t t2 on t1.e > t2.e",
+			plan:  "Join{DataScan(t1)->DataScan(t2)}->Projection",
+			left:  "[]",
+			right: "[not(isnull(t2.e))]",
+		},
+		{
+			sql:   "select * from t t1 left join t t2 on t1.e = t2.e and t2.e is not null",
+			plan:  "Join{DataScan(t1)->DataScan(t2)}(t1.e,t2.e)->Projection",
+			left:  "[]",
+			right: "[not(isnull(t2.e))]",
+		},
+		{
+			sql:   "select * from t t1 right join t t2 on t1.e = t2.e and t1.e is not null",
+			plan:  "Join{DataScan(t1)->DataScan(t2)}(t1.e,t2.e)->Projection",
+			left:  "[not(isnull(t1.e))]",
+			right: "[]",
+		},
+		{
+			sql:   "select * from t t1 inner join t t2 on t1.e <=> t2.e",
+			plan:  "Join{DataScan(t1)->DataScan(t2)}->Projection",
+			left:  "[]",
+			right: "[]",
+		},
+		{
+			sql:   "select * from t t1 left join t t2 on t1.e <=> t2.e",
+			plan:  "Join{DataScan(t1)->DataScan(t2)}->Projection",
+			left:  "[]",
+			right: "[]",
+		},
+		// Not deriving if column has NotNull flag already.
+		{
+			sql:   "select * from t t1 inner join t t2 on t1.b = t2.b",
+			plan:  "Join{DataScan(t1)->DataScan(t2)}(t1.b,t2.b)->Projection",
+			left:  "[]",
+			right: "[]",
+		},
+		{
+			sql:   "select * from t t1 left join t t2 on t1.b = t2.b",
+			plan:  "Join{DataScan(t1)->DataScan(t2)}(t1.b,t2.b)->Projection",
+			left:  "[]",
+			right: "[]",
+		},
+		{
+			sql:   "select * from t t1 left join t t2 on t1.b > t2.b",
+			plan:  "Join{DataScan(t1)->DataScan(t2)}->Projection",
+			left:  "[]",
+			right: "[]",
+		},
+		// Not deriving for AntiSemiJoin
+		{
+			sql:   "select * from t t1 where not exists (select * from t t2 where t2.e = t1.e)",
+			plan:  "Join{DataScan(t1)->DataScan(t2)}->Projection",
+			left:  "[]",
+			right: "[]",
+		},
+	}
+	for _, ca := range tests {
+		comment := Commentf("for %s", ca.sql)
+		stmt, err := s.ParseOneStmt(ca.sql, "", "")
+		c.Assert(err, IsNil, comment)
+		p, err := BuildLogicalPlan(s.ctx, stmt, s.is)
+		c.Assert(err, IsNil, comment)
+		p, err = logicalOptimize(flagPredicatePushDown|flagPrunColumns, p.(LogicalPlan))
+		c.Assert(err, IsNil, comment)
+		c.Assert(ToString(p), Equals, ca.plan, comment)
+		join := p.(LogicalPlan).Children()[0].(*LogicalJoin)
+		left := join.Children()[0].(*DataSource)
+		right := join.Children()[1].(*DataSource)
+		leftConds := fmt.Sprintf("%s", left.pushedDownConds)
+		rightConds := fmt.Sprintf("%s", right.pushedDownConds)
+		c.Assert(leftConds, Equals, ca.left, comment)
+		c.Assert(rightConds, Equals, ca.right, comment)
+	}
+}
+
+func (s *testPlanSuite) TestDupRandJoinCondsPushDown(c *C) {
+	sql := "select * from t as t1 join t t2 on t1.a > rand() and t1.a > rand()"
+	comment := Commentf("for %s", sql)
+	stmt, err := s.ParseOneStmt(sql, "", "")
+	c.Assert(err, IsNil, comment)
+	p, err := BuildLogicalPlan(s.ctx, stmt, s.is)
+	c.Assert(err, IsNil, comment)
+	p, err = logicalOptimize(flagPredicatePushDown, p.(LogicalPlan))
+	c.Assert(err, IsNil, comment)
+	proj, ok := p.(*LogicalProjection)
+	c.Assert(ok, IsTrue, comment)
+	join, ok := proj.children[0].(*LogicalJoin)
+	c.Assert(ok, IsTrue, comment)
+	leftPlan, ok := join.children[0].(*LogicalSelection)
+	c.Assert(ok, IsTrue, comment)
+	leftCond := fmt.Sprintf("%s", leftPlan.Conditions)
+	// Condition with mutable function cannot be de-duplicated when push down join conds.
+	c.Assert(leftCond, Equals, "[gt(cast(t1.a), rand()) gt(cast(t1.a), rand())]", comment)
+}
+
 func (s *testPlanSuite) TestTablePartition(c *C) {
 	defer testleak.AfterTest(c)()
 	definitions := []model.PartitionDefinition{
diff --git a/planner/core/logical_plans.go b/planner/core/logical_plans.go
index aa2cc510b67c9..af5d2c5fcefc7 100644
--- a/planner/core/logical_plans.go
+++ b/planner/core/logical_plans.go
@@ -146,7 +146,7 @@ func (p *LogicalJoin) columnSubstitute(schema *expression.Schema, exprs []expres
 }
 
 func (p *LogicalJoin) attachOnConds(onConds []expression.Expression) {
-	eq, left, right, other := extractOnCondition(onConds, p.children[0].(LogicalPlan), p.children[1].(LogicalPlan), false, false)
+	eq, left, right, other := p.extractOnCondition(onConds, false, false)
 	p.EqualConditions = append(eq, p.EqualConditions...)
 	p.LeftConditions = append(left, p.LeftConditions...)
 	p.RightConditions = append(right, p.RightConditions...)
diff --git a/planner/core/rule_predicate_push_down.go b/planner/core/rule_predicate_push_down.go
index bf352289da908..35e4538b63fe7 100644
--- a/planner/core/rule_predicate_push_down.go
+++ b/planner/core/rule_predicate_push_down.go
@@ -111,8 +111,6 @@ func (p *LogicalTableDual) PredicatePushDown(predicates []expression.Expression)
 // PredicatePushDown implements LogicalPlan PredicatePushDown interface.
 func (p *LogicalJoin) PredicatePushDown(predicates []expression.Expression) (ret []expression.Expression, retPlan LogicalPlan) {
 	simplifyOuterJoin(p, predicates)
-	leftPlan := p.children[0]
-	rightPlan := p.children[1]
 	var equalCond []*expression.ScalarFunction
 	var leftPushCond, rightPushCond, otherCond, leftCond, rightCond []expression.Expression
 	switch p.JoinType {
@@ -125,7 +123,7 @@ func (p *LogicalJoin) PredicatePushDown(predicates []expression.Expression) (ret
 		// Handle where conditions
 		predicates = expression.ExtractFiltersFromDNFs(p.ctx, predicates)
 		// Only derive left where condition, because right where condition cannot be pushed down
-		equalCond, leftPushCond, rightPushCond, otherCond = extractOnCondition(predicates, leftPlan, rightPlan, true, false)
+		equalCond, leftPushCond, rightPushCond, otherCond = p.extractOnCondition(predicates, true, false)
 		leftCond = leftPushCond
 		// Handle join conditions, only derive right join condition, because left join condition cannot be pushed down
 		_, derivedRightJoinCond := deriveOtherConditions(p, false, true)
@@ -142,7 +140,7 @@ func (p *LogicalJoin) PredicatePushDown(predicates []expression.Expression) (ret
 		// Handle where conditions
 		predicates = expression.ExtractFiltersFromDNFs(p.ctx, predicates)
 		// Only derive right where condition, because left where condition cannot be pushed down
-		equalCond, leftPushCond, rightPushCond, otherCond = extractOnCondition(predicates, leftPlan, rightPlan, false, true)
+		equalCond, leftPushCond, rightPushCond, otherCond = p.extractOnCondition(predicates, false, true)
 		rightCond = rightPushCond
 		// Handle join conditions, only derive left join condition, because right join condition cannot be pushed down
 		derivedLeftJoinCond, _ := deriveOtherConditions(p, true, false)
@@ -167,7 +165,7 @@ func (p *LogicalJoin) PredicatePushDown(predicates []expression.Expression) (ret
 				return ret, dual
 			}
 		}
-		equalCond, leftPushCond, rightPushCond, otherCond = extractOnCondition(tempCond, leftPlan, rightPlan, true, true)
+		equalCond, leftPushCond, rightPushCond, otherCond = p.extractOnCondition(tempCond, true, true)
 		p.LeftConditions = nil
 		p.RightConditions = nil
 		p.EqualConditions = equalCond
@@ -175,8 +173,10 @@ func (p *LogicalJoin) PredicatePushDown(predicates []expression.Expression) (ret
 		leftCond = leftPushCond
 		rightCond = rightPushCond
 	}
-	leftRet, lCh := leftPlan.PredicatePushDown(leftCond)
-	rightRet, rCh := rightPlan.PredicatePushDown(rightCond)
+	leftCond = expression.RemoveDupExprs(p.ctx, leftCond)
+	rightCond = expression.RemoveDupExprs(p.ctx, rightCond)
+	leftRet, lCh := p.children[0].PredicatePushDown(leftCond)
+	rightRet, rCh := p.children[1].PredicatePushDown(rightCond)
 	addSelection(p, lCh, leftRet, 0)
 	addSelection(p, rCh, rightRet, 1)
 	p.updateEQCond()
@@ -413,17 +413,50 @@ func deriveOtherConditions(p *LogicalJoin, deriveLeft bool, deriveRight bool) (l
 			if leftRelaxedCond != nil {
 				leftCond = append(leftCond, leftRelaxedCond)
 			}
+			notNullExpr := deriveNotNullExpr(expr, leftPlan.Schema())
+			if notNullExpr != nil {
+				leftCond = append(leftCond, notNullExpr)
+			}
 		}
 		if deriveRight {
 			rightRelaxedCond := expression.DeriveRelaxedFiltersFromDNF(expr, rightPlan.Schema())
 			if rightRelaxedCond != nil {
 				rightCond = append(rightCond, rightRelaxedCond)
 			}
+			notNullExpr := deriveNotNullExpr(expr, rightPlan.Schema())
+			if notNullExpr != nil {
+				rightCond = append(rightCond, notNullExpr)
+			}
 		}
 	}
 	return
 }
 
+// deriveNotNullExpr generates a new expression `not(isnull(col))` given `col1 op col2`,
+// in which `col` is in specified schema. Caller guarantees that only one of `col1` or
+// `col2` is in schema. This is only called for `OtherConditions` of outer join now,
+// so it is safe even if join type is LeftOuterSemiJoin or AntiLeftOuterSemiJoin.
+func deriveNotNullExpr(expr expression.Expression, schema *expression.Schema) expression.Expression {
+	binop, ok := expr.(*expression.ScalarFunction)
+	if !ok || len(binop.GetArgs()) != 2 {
+		return nil
+	}
+	ctx := binop.GetCtx()
+	arg0, lOK := binop.GetArgs()[0].(*expression.Column)
+	arg1, rOK := binop.GetArgs()[1].(*expression.Column)
+	if !lOK || !rOK {
+		return nil
+	}
+	childCol := schema.RetrieveColumn(arg0)
+	if childCol == nil {
+		childCol = schema.RetrieveColumn(arg1)
+	}
+	if isNullRejected(ctx, schema, expr) && !mysql.HasNotNullFlag(childCol.RetType.Flag) {
+		return expression.BuildNotNullExpr(ctx, childCol)
+	}
+	return nil
+}
+
 // conds2TableDual builds a LogicalTableDual if cond is constant false or null.
 func conds2TableDual(p LogicalPlan, conds []expression.Expression) LogicalPlan {
 	if len(conds) != 1 {
@@ -461,7 +494,8 @@ func (p *LogicalJoin) outerJoinPropConst(predicates []expression.Expression) []e
 	p.LeftConditions = nil
 	p.RightConditions = nil
 	p.OtherConditions = nil
-	joinConds, predicates = expression.PropConstOverOuterJoin(p.ctx, joinConds, predicates, outerTable.Schema(), innerTable.Schema())
+	nullSensitive := (p.JoinType == AntiLeftOuterSemiJoin || p.JoinType == LeftOuterSemiJoin)
+	joinConds, predicates = expression.PropConstOverOuterJoin(p.ctx, joinConds, predicates, outerTable.Schema(), innerTable.Schema(), nullSensitive)
 	p.attachOnConds(joinConds)
 	return predicates
 }