Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

exec: fix output batches of LEFT SEMI for hash and merge joiners #39294

Merged
merged 1 commit into from
Aug 15, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion pkg/sql/exec/hashjoiner.go
Original file line number Diff line number Diff line change
Expand Up @@ -745,10 +745,21 @@ func makeHashJoinProber(
var outColTypes []coltypes.T
var buildColOffset, probeColOffset uint32
if buildRightSide {
outColTypes = append(probe.sourceTypes, build.sourceTypes...)
if len(build.outCols) == 0 {
// We do not have output columns from the right side in case of LEFT SEMI
// and LEFT ANTI joins, and we should not have the corresponding columns
// in the output batch, so we only have the types from the left side in
// outColTypes.
outColTypes = probe.sourceTypes
} else {
outColTypes = append(probe.sourceTypes, build.sourceTypes...)
}
buildColOffset = uint32(len(probe.sourceTypes))
probeColOffset = 0
} else {
// Note that we don't need to check whether probe.outCols is non-empty
// before populating outColTypes because LEFT SEMI and LEFT ANTI joins will
// always build the right side.
outColTypes = append(build.sourceTypes, probe.sourceTypes...)
buildColOffset = 0
probeColOffset = nBuildCols
Expand Down
10 changes: 9 additions & 1 deletion pkg/sql/exec/mergejoiner.go
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,15 @@ func (o *mergeJoinBase) Init() {
func (o *mergeJoinBase) initWithBatchSize(outBatchSize uint16) {
outColTypes := make([]coltypes.T, len(o.left.sourceTypes)+len(o.right.sourceTypes))
copy(outColTypes, o.left.sourceTypes)
copy(outColTypes[len(o.left.sourceTypes):], o.right.sourceTypes)
if len(o.right.outCols) == 0 {
// We do not have output columns from the right input in case of LEFT SEMI
// and LEFT ANTI joins, and we should not have the corresponding columns in
// the output batch, so we only have the types from the left input in
// outColTypes.
outColTypes = outColTypes[:len(o.left.sourceTypes)]
} else {
copy(outColTypes[len(o.left.sourceTypes):], o.right.sourceTypes)
}

o.output = coldata.NewMemBatchWithSize(outColTypes, int(outBatchSize))
o.left.source.Init()
Expand Down
30 changes: 30 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/exec_hash_join_dist
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# LogicTest: 5node-dist-vec

statement ok
CREATE TABLE t (k INT, v INT)

statement ok
INSERT INTO t VALUES (1, 10), (2, 20), (3, 30)

statement ok
ALTER TABLE t EXPERIMENTAL_RELOCATE VALUES (ARRAY[3], 1)

statement ok
CREATE TABLE xy (x INT PRIMARY KEY, y INT)

statement ok
INSERT INTO xy VALUES (2, 200), (3, 300), (4, 400)

statement ok
ALTER TABLE t SPLIT AT VALUES (3), (4)

statement ok
ALTER TABLE t EXPERIMENTAL_RELOCATE VALUES (ARRAY[1], 2), (ARRAY[2], 3), (ARRAY[3], 4)

# Test that LEFT SEMI hash join outputs batches only with the columns from the
# left side.
query II rowsort
SELECT * FROM t WHERE EXISTS(SELECT * FROM xy WHERE x=t.k)
----
2 20
3 30
41 changes: 25 additions & 16 deletions pkg/sql/logictest/testdata/logic_test/exec_merge_join_dist
Original file line number Diff line number Diff line change
Expand Up @@ -3,50 +3,59 @@
# Regression test for #39317.

statement ok
CREATE TABLE l (a INT PRIMARY KEY)
CREATE TABLE l (a INT PRIMARY KEY, b INT)

statement ok
CREATE TABLE r (a INT PRIMARY KEY)
CREATE TABLE r (a INT PRIMARY KEY, b INT)

statement ok
INSERT INTO l VALUES (1), (2)
INSERT INTO l VALUES (1, 10), (2, 20), (3, 30)

statement ok
INSERT INTO r VALUES (2), (3)
INSERT INTO r VALUES (2, 200), (3, 300), (4, 400)

statement ok
ALTER TABLE l SPLIT AT VALUES (2)
ALTER TABLE l SPLIT AT VALUES (2), (3)

statement ok
ALTER TABLE r SPLIT AT VALUES (2)
ALTER TABLE r SPLIT AT VALUES (2), (3)

statement ok
ALTER TABLE l EXPERIMENTAL_RELOCATE VALUES (ARRAY[1], 1), (ARRAY[2], 2)
ALTER TABLE l EXPERIMENTAL_RELOCATE VALUES (ARRAY[1], 1), (ARRAY[2], 2), (ARRAY[3], 3)

statement ok
ALTER TABLE r EXPERIMENTAL_RELOCATE VALUES (ARRAY[1], 2), (ARRAY[2], 1)
ALTER TABLE r EXPERIMENTAL_RELOCATE VALUES (ARRAY[1], 2), (ARRAY[2], 3), (ARRAY[3], 1)

query TTTI colnames
SELECT start_key, end_key, replicas, lease_holder from [SHOW EXPERIMENTAL_RANGES FROM TABLE l]
SELECT start_key, end_key, replicas, lease_holder from [SHOW EXPERIMENTAL_RANGES FROM TABLE l] ORDER BY lease_holder
----
start_key end_key replicas lease_holder
NULL /2 {1} 1
/2 NULL {2} 2
/2 /3 {2} 2
/3 NULL {3} 3

query TTTI colnames
SELECT start_key, end_key, replicas, lease_holder from [SHOW EXPERIMENTAL_RANGES FROM TABLE r]
SELECT start_key, end_key, replicas, lease_holder from [SHOW EXPERIMENTAL_RANGES FROM TABLE r] ORDER BY lease_holder
----
start_key end_key replicas lease_holder
/2 NULL {1} 1
NULL /2 {2} 2
/2 /3 {1} 1
/3 NULL {2} 2
NULL /2 {3} 3

query T
SELECT url FROM [EXPLAIN (DISTSQL) SELECT * FROM l LEFT OUTER JOIN r USING(a) WHERE a = 2]
----
https://cockroachdb.github.io/distsqlplan/decode.html#eJzEk09rgzAYxu_7FPLusrEUjXWXwKCXDlq2Ojp3Gh5S884K1kgSYaX43Yc6aHXWtWxsN_Pn97zPkwd3kEmBC75BDewVKBBwISSQKxmh1lJV282lmXgH5hBIsrww1XZIIJIKge3AJCZFYBDwVYpL5AKVXWkJNDxJa-lcJRuuthMFBJ5znmlm2e7Idu1LCEsCsjCfsnu11dZac71u60wohGVIQBseIzC3JEfs7XWKTCqBCkVLKazI7670ZHxEFeNcJhkqe9z2FmxzZNbD9D6w_JdgurTm_mwBBFJ8M1cTenN9p5J43XwCAb8wzKrzHObfZxv_IFuP8YUcydz2uo_QO9prjaant-70t57-buv0qL2_aN39p9Z7bC1R5zLTeFKpThUMRYzNQ2lZqAiflIzqMc3Sr7n6TQVq05x6zWKW1Uf1L3c6TIdh2oWdQ9htwbQLu4PwbQt2uvD4jMxfJg_DdBj2zsoclhcfAQAA___49Nlj
https://cockroachdb.github.io/distsqlplan/decode.html#eJzEk8Fro0AUxu_7V8jbyy47QWfiXgYWvGQhYTeW1J6Kh4nzagTjyMwIDcH_vaiFVGtsQkt7c2be73vvex8eoVAS12KPBvg9UCDAICZQapWgMUo3113RUj4C9whkRVnZ5jomkCiNwI9gM5sjcIjENscNConabbQkWpHlrXSps73Qh0ADgdtSFIY7Lpu5zP0OcU1AVfZZ9qS2PTg7YXZ9nYBCXMcEjBUpAmc1OTPeSacqlJaoUfaU4oZ8q2TE43_UKa5UVqB25_3ZokOJ3Pm3-Bs54V202DircLkGAjk-2B8B_fXzj87SXfcJBMLKciegJGAk8AdbODmcv8PhyPhrNVOl6w9XMdra77Wml2fvjWeff2z29Ox4n5E9-9LsR4bboClVYfCiaL3GHsoUu3UZVekEb7RK2jbdMWy5drMSje1e_e6wLNqn9ve7HKbTMB3C3kuY9WA6hNkk_LsHe0N4foXnV52nYToN-1d5jutvTwEAAP__eVLb3w==

query I
query III
SELECT * FROM l LEFT OUTER JOIN r USING(a) WHERE a = 2
----
2
2 20 200

# Test that LEFT SEMI merge join outputs batches only with the columns from the
# left side.
query II rowsort
SELECT * FROM l WHERE EXISTS(SELECT * FROM r WHERE r.a=l.a)
----
2 20
3 30