Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

planner, executor: implement the null-aware antiSemiJoin and null-aware antiLeftOuterSemiJoin (hash join with inner build) #37512

Merged
merged 24 commits into from
Sep 19, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
412 changes: 412 additions & 0 deletions cmd/explaintest/r/naaj.result

Large diffs are not rendered by default.

213 changes: 213 additions & 0 deletions cmd/explaintest/t/naaj.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
# naaj.test file is for null-aware anti join
use test;
set @@session.tidb_enable_null_aware_anti_join=1;
# assert the cases for the left side without null.
select "***************************************************** PART 1 *****************************************************************" as name;
drop table if exists naaj_A, naaj_B;
create table naaj_A(a int, b int, c int);
create table naaj_B(a int, b int, c int);
insert into naaj_A values (1,1,1);
insert into naaj_B values (1,2,2);

# assert 1: both side don't have null values.
# AntiLeftOuterSemiJoin
explain format = 'brief' select (a, b) not in (select a, b from naaj_B) from naaj_A;
select (a, b) not in (select a, b from naaj_B) from naaj_A;

# AntiSemiJoin
explain format = 'brief' select * from naaj_A where (a, b) not in (select a, b from naaj_B);
select * from naaj_A where (a, b) not in (select a, b from naaj_B);

# assert 2: right side has same key bucket.
insert into naaj_B values(1,1,1);
select (a, b) not in (select a, b from naaj_B) from naaj_A;
select * from naaj_A where (a, b) not in (select a, b from naaj_B);

# assert 3: right side has null values.
insert into naaj_B values(1, null, 2);
select (a, b) not in (select a, b from naaj_B) from naaj_A;
select * from naaj_A where (a, b) not in (select a, b from naaj_B);

# assert 4: right side have null values, but it can't pass the inner(join key related or not) filter.
explain format = 'brief' select (a, b) not in (select a, b from naaj_B where naaj_A.c > naaj_B.c) from naaj_A;
select (a, b) not in (select a, b from naaj_B where naaj_A.c > naaj_B.c) from naaj_A;

explain format = 'brief' select * from naaj_A where (a, b) not in (select a, b from naaj_B where naaj_A.c > naaj_B.c);
select * from naaj_A where (a, b) not in (select a, b from naaj_B where naaj_A.c > naaj_B.c);

explain format = 'brief' select (a, b) not in (select a, b from naaj_B where naaj_A.a != naaj_B.a) from naaj_A;
select (a, b) not in (select a, b from naaj_B where naaj_A.a != naaj_B.a) from naaj_A;

explain format = 'brief' select * from naaj_A where (a, b) not in (select a, b from naaj_B where naaj_A.a != naaj_B.a);
select * from naaj_A where (a, b) not in (select a, b from naaj_B where naaj_A.a != naaj_B.a);

# assert 5: right side is empty.
select * from naaj_A where (a, b) not in (select a, b from naaj_B where false);
select (a, b) not in (select a, b from naaj_B where false) from naaj_A;

# assert 6: right side null bucket filter (not-null join key should match with each other).
insert into naaj_B values(2, null, 2);
select (a, b) not in (select a, b from naaj_B) from naaj_A;
select * from naaj_A where (a, b) not in (select a, b from naaj_B);

delete from naaj_B where a=1 and b=1 and c=1;
select (a, b) not in (select a, b from naaj_B) from naaj_A;
select * from naaj_A where (a, b) not in (select a, b from naaj_B);

# case 2: assert the cases for the left side has null.
select "***************************************************** PART 2 *****************************************************************" as name;
delete from naaj_A;
delete from naaj_B;
insert into naaj_A values(1,null,1);

# assert 1: left side has null, while the right is empty.
select (a, b) not in (select a, b from naaj_B) from naaj_A;
select * from naaj_A where (a, b) not in (select a, b from naaj_B);

# assert 2: left side has null, while the right has a invalid null row (can't pass the nullBit filter).
insert into naaj_B values(2, null, 2);
select (a, b) not in (select a, b from naaj_B) from naaj_A;
select * from naaj_A where (a, b) not in (select a, b from naaj_B);

# left side has null, while the right has a valid null row. (passed the nullBit filter).
insert into naaj_B values(null, null, 2);
select (a, b) not in (select a, b from naaj_B) from naaj_A;
select * from naaj_A where (a, b) not in (select a, b from naaj_B);

# assert 3: left side has null, while the right has a valid non-null row.
delete from naaj_B;
insert into naaj_B values(2, 2, 2);
select (a, b) not in (select a, b from naaj_B) from naaj_A;
select * from naaj_A where (a, b) not in (select a, b from naaj_B);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we have cases where naaj_A has (null, null, null) tuple?


# assert 4: left side has null, while the right has no valid rows (equivalent to ).
insert into naaj_B values(2, null, 2);
insert into naaj_B values(null, null, 2);
explain format = 'brief' select (a, b) not in (select a, b from naaj_B where naaj_A.c > naaj_B.c) from naaj_A;
select (a, b) not in (select a, b from naaj_B where naaj_A.c > naaj_B.c) from naaj_A;
explain format = 'brief' select * from naaj_A where (a, b) not in (select a, b from naaj_B where naaj_A.c > naaj_B.c);
select * from naaj_A where (a, b) not in (select a, b from naaj_B where naaj_A.c > naaj_B.c);

# assert 5: When the inner subq has a correlated EQ condition, we won't built the NA-EQ connecting condition here.
explain format = 'brief' select (a, b) not in (select a, b from naaj_B where naaj_A.c = naaj_B.c) from naaj_A;
select (a, b) not in (select a, b from naaj_B where naaj_A.c = naaj_B.c) from naaj_A;
explain format = 'brief' select * from naaj_A where (a, b) not in (select a, b from naaj_B where naaj_A.c = naaj_B.c);
select * from naaj_A where (a, b) not in (select a, b from naaj_B where naaj_A.c = naaj_B.c);

# case 3: assert the cases for the equivalent semantic predicate of != ALL
select "***************************************************** PART 3 *****************************************************************" as name;
drop table if exists naaj_A, naaj_B;
create table naaj_A(a int, b int, c int);
create table naaj_B(a int, b int, c int);
insert into naaj_A values (1,1,1);
insert into naaj_B values (1,2,2);

# assert 1: both side don't have null values.
# AntiLeftOuterSemiJoin
explain format = 'brief' select (a, b) != all (select a, b from naaj_B) from naaj_A;
select (a, b) != all (select a, b from naaj_B) from naaj_A;

# AntiSemiJoin
explain format = 'brief' select * from naaj_A where (a, b) != all (select a, b from naaj_B);
select * from naaj_A where (a, b) != all (select a, b from naaj_B);

# assert 2: right side has same key bucket.
insert into naaj_B values(1,1,1);
select (a, b) != all (select a, b from naaj_B) from naaj_A;
select * from naaj_A where (a, b) != all (select a, b from naaj_B);

# assert 3: right side has null values.
insert into naaj_B values(1, null, 2);
select (a, b) != all (select a, b from naaj_B) from naaj_A;
select * from naaj_A where (a, b) != all (select a, b from naaj_B);

# assert 4: right side have null values, but it can't pass the inner(join key related or not) filter.
explain format = 'brief' select (a, b) != all (select a, b from naaj_B where naaj_A.c > naaj_B.c) from naaj_A;
select (a, b) != all (select a, b from naaj_B where naaj_A.c > naaj_B.c) from naaj_A;

explain format = 'brief' select * from naaj_A where (a, b) != all (select a, b from naaj_B where naaj_A.c > naaj_B.c);
select * from naaj_A where (a, b) != all (select a, b from naaj_B where naaj_A.c > naaj_B.c);

explain format = 'brief' select (a, b) != all (select a, b from naaj_B where naaj_A.a != naaj_B.a) from naaj_A;
select (a, b) != all (select a, b from naaj_B where naaj_A.a != naaj_B.a) from naaj_A;

explain format = 'brief' select * from naaj_A where (a, b) != all (select a, b from naaj_B where naaj_A.a != naaj_B.a);
select * from naaj_A where (a, b) != all (select a, b from naaj_B where naaj_A.a != naaj_B.a);

# assert 5: right side is empty.
select * from naaj_A where (a, b) != all (select a, b from naaj_B where false);
select (a, b) != all (select a, b from naaj_B where false) from naaj_A;

# assert 6: right side null bucket filter (not-null join key should match with each other).
insert into naaj_B values(2, null, 2);
select (a, b) != all (select a, b from naaj_B) from naaj_A;
select * from naaj_A where (a, b) != all (select a, b from naaj_B);

delete from naaj_B where a=1 and b=1 and c=1;
select (a, b) != all (select a, b from naaj_B) from naaj_A;
select * from naaj_A where (a, b) != all (select a, b from naaj_B);

# case 4: assert the cases for the equivalent semantic predicate of != ALL
select "***************************************************** PART 4 *****************************************************************" as name;
delete from naaj_A;
delete from naaj_B;
insert into naaj_A values(1,null,1);

# assert 1: left side has null, while the right is empty.
select (a, b) != all (select a, b from naaj_B) from naaj_A;
select * from naaj_A where (a, b) != all (select a, b from naaj_B);

# assert 2: left side has null, while the right has a invalid null row (can't pass the nullBit filter).
insert into naaj_B values(2, null, 2);
select (a, b) != all (select a, b from naaj_B) from naaj_A;
select * from naaj_A where (a, b) != all (select a, b from naaj_B);

# left side has null, while the right has a valid null row. (passed the nullBit filter).
insert into naaj_B values(null, null, 2);
select (a, b) != all (select a, b from naaj_B) from naaj_A;
select * from naaj_A where (a, b) != all (select a, b from naaj_B);

# assert 3: left side has null, while the right has a valid non-null row.
delete from naaj_B;
insert into naaj_B values(2, 2, 2);
select (a, b) != all (select a, b from naaj_B) from naaj_A;
select * from naaj_A where (a, b) != all (select a, b from naaj_B);

# assert 4: left side has null, while the right has no valid rows (equivalent to ).
insert into naaj_B values(2, null, 2);
insert into naaj_B values(null, null, 2);
explain format = 'brief' select (a, b) != all (select a, b from naaj_B where naaj_A.c > naaj_B.c) from naaj_A;
select (a, b) != all (select a, b from naaj_B where naaj_A.c > naaj_B.c) from naaj_A;
explain format = 'brief' select * from naaj_A where (a, b) != all (select a, b from naaj_B where naaj_A.c > naaj_B.c);
select * from naaj_A where (a, b) != all (select a, b from naaj_B where naaj_A.c > naaj_B.c);

# assert 5: When the inner subq has a correlated EQ condition, we won't built the NA-EQ connecting condition here.
explain format = 'brief' select (a, b) != all (select a, b from naaj_B where naaj_A.c = naaj_B.c) from naaj_A;
select (a, b) != all (select a, b from naaj_B where naaj_A.c = naaj_B.c) from naaj_A;
explain format = 'brief' select * from naaj_A where (a, b) != all (select a, b from naaj_B where naaj_A.c = naaj_B.c);
select * from naaj_A where (a, b) != all (select a, b from naaj_B where naaj_A.c = naaj_B.c);

# case 5: assert some bugs.
select "***************************************************** PART 5 *****************************************************************" as name;
delete from naaj_A;
delete from naaj_B;
insert into naaj_A values(1,1,1);
insert into naaj_B values(2,null,2);

# assert 1: although the probe key doesn't have null values, we still need to use buildNullBits to guarantee the non-null position has the exactly the same value.
select (a,b) not in (select a, b from naaj_B) from naaj_A;
select * from naaj_A where (a,b) not in (select a, b from naaj_B);

# assert 2: should inject the projection under join.
explain select (a+1,b*2) not in (select a, b from naaj_B) from naaj_A;
select (a+1,b*2) not in (select a, b from naaj_B) from naaj_A;
insert into naaj_B values(2,2,2);
select (a+1,b*2) not in (select a, b from naaj_B) from naaj_A;

explain select * from naaj_A where (a+1,b*2) not in (select a+1, b-1 from naaj_B);
select * from naaj_A where (a+1,b*2) not in (select a, b from naaj_B);

# assert 3: NA-EQ and EQ can't co-exist at the same time.
explain select (a+1,b*2) not in (select a, b=1 from naaj_B where naaj_A.a = naaj_B.a) from naaj_A;
explain select * from naaj_A where (a+1,b*2) not in (select a, b=1 from naaj_B where naaj_A.a = naaj_B.a);
set @@session.tidb_enable_null_aware_anti_join=0;
7 changes: 4 additions & 3 deletions executor/benchmark_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -928,7 +928,7 @@ func prepare4HashJoin(testCase *hashJoinTestCase, innerExec, outerExec Executor)
e.joiners = make([]joiner, e.concurrency)
for i := uint(0); i < e.concurrency; i++ {
e.joiners[i] = newJoiner(testCase.ctx, e.joinType, true, defaultValues,
nil, lhsTypes, rhsTypes, childrenUsedSchema)
nil, lhsTypes, rhsTypes, childrenUsedSchema, false)
}
memLimit := int64(-1)
if testCase.disk {
Expand Down Expand Up @@ -1335,7 +1335,7 @@ func prepare4IndexInnerHashJoin(tc *indexJoinTestCase, outerDS *mockDataSource,
hashCols: tc.innerHashKeyIdx,
},
workerWg: new(sync.WaitGroup),
joiner: newJoiner(tc.ctx, 0, false, defaultValues, nil, leftTypes, rightTypes, nil),
joiner: newJoiner(tc.ctx, 0, false, defaultValues, nil, leftTypes, rightTypes, nil, false),
isOuterJoin: false,
keyOff2IdxOff: keyOff2IdxOff,
lastColHelper: nil,
Expand Down Expand Up @@ -1419,7 +1419,7 @@ func prepare4IndexMergeJoin(tc *indexJoinTestCase, outerDS *mockDataSource, inne
concurrency := e.ctx.GetSessionVars().IndexLookupJoinConcurrency()
joiners := make([]joiner, concurrency)
for i := 0; i < concurrency; i++ {
joiners[i] = newJoiner(tc.ctx, 0, false, defaultValues, nil, leftTypes, rightTypes, nil)
joiners[i] = newJoiner(tc.ctx, 0, false, defaultValues, nil, leftTypes, rightTypes, nil, false)
}
e.joiners = joiners
return e, nil
Expand Down Expand Up @@ -1538,6 +1538,7 @@ func prepareMergeJoinExec(tc *mergeJoinTestCase, joinSchema *expression.Schema,
retTypes(leftExec),
retTypes(rightExec),
tc.childrenUsedSchema,
false,
)

mergeJoinExec.innerTable = &mergeJoinTable{
Expand Down
Loading