From 5a7955f6e455300e03bc4a816834589340f1fd90 Mon Sep 17 00:00:00 2001 From: Yahor Yuzefovich Date: Sat, 3 Aug 2019 08:25:25 -0700 Subject: [PATCH] exec: fix output batches of LEFT SEMI for hash and merge joiners Previously, the merge joiner's output batch would always have the columns corresponding to both the left and the right sides (even with LEFT SEMI and LEFT ANTI join types although the right side output would not be used). This is incorrect, and now the merge joiner outputs batches with the correct number of columns. A similar issue was present with LEFT SEMI hash joiner and is now fixed. Release note: None --- pkg/sql/exec/hashjoiner.go | 13 +++++- pkg/sql/exec/mergejoiner.go | 10 ++++- .../testdata/logic_test/exec_hash_join_dist | 30 ++++++++++++++ .../testdata/logic_test/exec_merge_join_dist | 41 +++++++++++-------- 4 files changed, 76 insertions(+), 18 deletions(-) create mode 100644 pkg/sql/logictest/testdata/logic_test/exec_hash_join_dist diff --git a/pkg/sql/exec/hashjoiner.go b/pkg/sql/exec/hashjoiner.go index 492d6cffc4c8..5c6404c3895b 100644 --- a/pkg/sql/exec/hashjoiner.go +++ b/pkg/sql/exec/hashjoiner.go @@ -745,10 +745,21 @@ func makeHashJoinProber( var outColTypes []coltypes.T var buildColOffset, probeColOffset uint32 if buildRightSide { - outColTypes = append(probe.sourceTypes, build.sourceTypes...) + if len(build.outCols) == 0 { + // We do not have output columns from the right side in case of LEFT SEMI + // and LEFT ANTI joins, and we should not have the corresponding columns + // in the output batch, so we only have the types from the left side in + // outColTypes. + outColTypes = probe.sourceTypes + } else { + outColTypes = append(probe.sourceTypes, build.sourceTypes...) + } buildColOffset = uint32(len(probe.sourceTypes)) probeColOffset = 0 } else { + // Note that we don't need to check whether probe.outCols is non-empty + // before populating outColTypes because LEFT SEMI and LEFT ANTI joins will + // always build the right side. outColTypes = append(build.sourceTypes, probe.sourceTypes...) buildColOffset = 0 probeColOffset = nBuildCols diff --git a/pkg/sql/exec/mergejoiner.go b/pkg/sql/exec/mergejoiner.go index 49087a938c08..5c9f305d7afd 100644 --- a/pkg/sql/exec/mergejoiner.go +++ b/pkg/sql/exec/mergejoiner.go @@ -327,7 +327,15 @@ func (o *mergeJoinBase) Init() { func (o *mergeJoinBase) initWithBatchSize(outBatchSize uint16) { outColTypes := make([]coltypes.T, len(o.left.sourceTypes)+len(o.right.sourceTypes)) copy(outColTypes, o.left.sourceTypes) - copy(outColTypes[len(o.left.sourceTypes):], o.right.sourceTypes) + if len(o.right.outCols) == 0 { + // We do not have output columns from the right input in case of LEFT SEMI + // and LEFT ANTI joins, and we should not have the corresponding columns in + // the output batch, so we only have the types from the left input in + // outColTypes. + outColTypes = outColTypes[:len(o.left.sourceTypes)] + } else { + copy(outColTypes[len(o.left.sourceTypes):], o.right.sourceTypes) + } o.output = coldata.NewMemBatchWithSize(outColTypes, int(outBatchSize)) o.left.source.Init() diff --git a/pkg/sql/logictest/testdata/logic_test/exec_hash_join_dist b/pkg/sql/logictest/testdata/logic_test/exec_hash_join_dist new file mode 100644 index 000000000000..babcd0a123da --- /dev/null +++ b/pkg/sql/logictest/testdata/logic_test/exec_hash_join_dist @@ -0,0 +1,30 @@ +# LogicTest: 5node-dist-vec + +statement ok +CREATE TABLE t (k INT, v INT) + +statement ok +INSERT INTO t VALUES (1, 10), (2, 20), (3, 30) + +statement ok +ALTER TABLE t EXPERIMENTAL_RELOCATE VALUES (ARRAY[3], 1) + +statement ok +CREATE TABLE xy (x INT PRIMARY KEY, y INT) + +statement ok +INSERT INTO xy VALUES (2, 200), (3, 300), (4, 400) + +statement ok +ALTER TABLE t SPLIT AT VALUES (3), (4) + +statement ok +ALTER TABLE t EXPERIMENTAL_RELOCATE VALUES (ARRAY[1], 2), (ARRAY[2], 3), (ARRAY[3], 4) + +# Test that LEFT SEMI hash join outputs batches only with the columns from the +# left side. +query II rowsort +SELECT * FROM t WHERE EXISTS(SELECT * FROM xy WHERE x=t.k) +---- +2 20 +3 30 diff --git a/pkg/sql/logictest/testdata/logic_test/exec_merge_join_dist b/pkg/sql/logictest/testdata/logic_test/exec_merge_join_dist index eaadc62eb6a5..cc1b7d1fdf06 100644 --- a/pkg/sql/logictest/testdata/logic_test/exec_merge_join_dist +++ b/pkg/sql/logictest/testdata/logic_test/exec_merge_join_dist @@ -3,50 +3,59 @@ # Regression test for #39317. statement ok -CREATE TABLE l (a INT PRIMARY KEY) +CREATE TABLE l (a INT PRIMARY KEY, b INT) statement ok -CREATE TABLE r (a INT PRIMARY KEY) +CREATE TABLE r (a INT PRIMARY KEY, b INT) statement ok -INSERT INTO l VALUES (1), (2) +INSERT INTO l VALUES (1, 10), (2, 20), (3, 30) statement ok -INSERT INTO r VALUES (2), (3) +INSERT INTO r VALUES (2, 200), (3, 300), (4, 400) statement ok -ALTER TABLE l SPLIT AT VALUES (2) +ALTER TABLE l SPLIT AT VALUES (2), (3) statement ok -ALTER TABLE r SPLIT AT VALUES (2) +ALTER TABLE r SPLIT AT VALUES (2), (3) statement ok -ALTER TABLE l EXPERIMENTAL_RELOCATE VALUES (ARRAY[1], 1), (ARRAY[2], 2) +ALTER TABLE l EXPERIMENTAL_RELOCATE VALUES (ARRAY[1], 1), (ARRAY[2], 2), (ARRAY[3], 3) statement ok -ALTER TABLE r EXPERIMENTAL_RELOCATE VALUES (ARRAY[1], 2), (ARRAY[2], 1) +ALTER TABLE r EXPERIMENTAL_RELOCATE VALUES (ARRAY[1], 2), (ARRAY[2], 3), (ARRAY[3], 1) query TTTI colnames -SELECT start_key, end_key, replicas, lease_holder from [SHOW EXPERIMENTAL_RANGES FROM TABLE l] +SELECT start_key, end_key, replicas, lease_holder from [SHOW EXPERIMENTAL_RANGES FROM TABLE l] ORDER BY lease_holder ---- start_key end_key replicas lease_holder NULL /2 {1} 1 -/2 NULL {2} 2 +/2 /3 {2} 2 +/3 NULL {3} 3 query TTTI colnames -SELECT start_key, end_key, replicas, lease_holder from [SHOW EXPERIMENTAL_RANGES FROM TABLE r] +SELECT start_key, end_key, replicas, lease_holder from [SHOW EXPERIMENTAL_RANGES FROM TABLE r] ORDER BY lease_holder ---- start_key end_key replicas lease_holder -/2 NULL {1} 1 -NULL /2 {2} 2 +/2 /3 {1} 1 +/3 NULL {2} 2 +NULL /2 {3} 3 query T SELECT url FROM [EXPLAIN (DISTSQL) SELECT * FROM l LEFT OUTER JOIN r USING(a) WHERE a = 2] ---- -https://cockroachdb.github.io/distsqlplan/decode.html#eJzEk09rgzAYxu_7FPLusrEUjXWXwKCXDlq2Ojp3Gh5S884K1kgSYaX43Yc6aHXWtWxsN_Pn97zPkwd3kEmBC75BDewVKBBwISSQKxmh1lJV282lmXgH5hBIsrww1XZIIJIKge3AJCZFYBDwVYpL5AKVXWkJNDxJa-lcJRuuthMFBJ5znmlm2e7Idu1LCEsCsjCfsnu11dZac71u60wohGVIQBseIzC3JEfs7XWKTCqBCkVLKazI7670ZHxEFeNcJhkqe9z2FmxzZNbD9D6w_JdgurTm_mwBBFJ8M1cTenN9p5J43XwCAb8wzKrzHObfZxv_IFuP8YUcydz2uo_QO9prjaant-70t57-buv0qL2_aN39p9Z7bC1R5zLTeFKpThUMRYzNQ2lZqAiflIzqMc3Sr7n6TQVq05x6zWKW1Uf1L3c6TIdh2oWdQ9htwbQLu4PwbQt2uvD4jMxfJg_DdBj2zsoclhcfAQAA___49Nlj +https://cockroachdb.github.io/distsqlplan/decode.html#eJzEk8Fro0AUxu_7V8jbyy47QWfiXgYWvGQhYTeW1J6Kh4nzagTjyMwIDcH_vaiFVGtsQkt7c2be73vvex8eoVAS12KPBvg9UCDAICZQapWgMUo3113RUj4C9whkRVnZ5jomkCiNwI9gM5sjcIjENscNConabbQkWpHlrXSps73Qh0ADgdtSFIY7Lpu5zP0OcU1AVfZZ9qS2PTg7YXZ9nYBCXMcEjBUpAmc1OTPeSacqlJaoUfaU4oZ8q2TE43_UKa5UVqB25_3ZokOJ3Pm3-Bs54V202DircLkGAjk-2B8B_fXzj87SXfcJBMLKciegJGAk8AdbODmcv8PhyPhrNVOl6w9XMdra77Wml2fvjWeff2z29Ox4n5E9-9LsR4bboClVYfCiaL3GHsoUu3UZVekEb7RK2jbdMWy5drMSje1e_e6wLNqn9ve7HKbTMB3C3kuY9WA6hNkk_LsHe0N4foXnV52nYToN-1d5jutvTwEAAP__eVLb3w== -query I +query III SELECT * FROM l LEFT OUTER JOIN r USING(a) WHERE a = 2 ---- -2 +2 20 200 +# Test that LEFT SEMI merge join outputs batches only with the columns from the +# left side. +query II rowsort +SELECT * FROM l WHERE EXISTS(SELECT * FROM r WHERE r.a=l.a) +---- +2 20 +3 30