diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-joins.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-joins.sql index f06be5fc6aa5..cd350a98e130 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-joins.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-joins.sql @@ -51,6 +51,18 @@ create temporary view t3 as select * from values ("val3b", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2015-05-04 01:02:00.000', date '2015-05-04') as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i); +create temporary view s1 as select * from values + (1), (3), (5), (7), (9) + as s1(id); + +create temporary view s2 as select * from values + (1), (3), (4), (6), (9) + as s2(id); + +create temporary view s3 as select * from values + (3), (4), (6), (9) + as s3(id); + -- correlated IN subquery -- different JOIN in parent side -- TC 01.01 @@ -272,3 +284,101 @@ Group By t1a, t1b, t1c, t2a, t2b, t2c HAVING t2c IS NOT NULL ORDER By t2b DESC nulls last; + +SELECT s1.id FROM s1 +JOIN s2 ON s1.id = s2.id +AND s1.id IN (SELECT 9); + + +SELECT s1.id FROM s1 +JOIN s2 ON s1.id = s2.id +AND s1.id NOT IN (SELECT 9); + + +-- IN with Subquery ON INNER JOIN +SELECT s1.id FROM s1 +JOIN s2 ON s1.id = s2.id +AND s1.id IN (SELECT id FROM s3); + + +-- IN with Subquery ON LEFT SEMI JOIN +SELECT s1.id AS id2 FROM s1 +LEFT SEMI JOIN s2 +ON s1.id = s2.id +AND s1.id IN (SELECT id FROM s3); + + +-- IN with Subquery ON LEFT ANTI JOIN +SELECT s1.id as id2 FROM s1 +LEFT ANTI JOIN s2 +ON s1.id = s2.id +AND s1.id IN (SELECT id FROM s3); + + +-- IN with Subquery ON LEFT OUTER JOIN +SELECT s1.id, s2.id as id2 FROM s1 +LEFT OUTER JOIN s2 +ON s1.id = s2.id +AND s1.id IN (SELECT id FROM s3); + + +-- IN with Subquery ON RIGHT OUTER JOIN +SELECT s1.id, s2.id as id2 FROM s1 +RIGHT OUTER JOIN s2 +ON s1.id = s2.id +AND s1.id IN (SELECT id FROM s3); + + +-- IN with Subquery ON FULL OUTER JOIN +SELECT s1.id, s2.id AS id2 FROM s1 +FULL OUTER JOIN s2 +ON s1.id = s2.id +AND s1.id IN (SELECT id FROM s3); + + +-- NOT IN with Subquery ON INNER JOIN +SELECT s1.id FROM s1 +JOIN s2 ON s1.id = s2.id +AND s1.id NOT IN (SELECT id FROM s3); + + +-- NOT IN with Subquery ON LEFT SEMI JOIN +SELECT s1.id AS id2 FROM s1 +LEFT SEMI JOIN s2 +ON s1.id = s2.id +AND s1.id NOT IN (SELECT id FROM s3); + + +-- NOT IN with Subquery ON LEFT ANTI JOIN +SELECT s1.id AS id2 FROM s1 +LEFT ANTI JOIN s2 +ON s1.id = s2.id +AND s1.id NOT IN (SELECT id FROM s3); + + +-- NOT IN with Subquery ON LEFT OUTER JOIN +SELECT s1.id, s2.id AS id2 FROM s1 +LEFT OUTER JOIN s2 +ON s1.id = s2.id +AND s1.id NOT IN (SELECT id FROM s3); + + +-- NOT IN with Subquery ON RIGHT OUTER JOIN +SELECT s1.id, s2.id AS id2 FROM s1 +RIGHT OUTER JOIN s2 +ON s1.id = s2.id +AND s1.id NOT IN (SELECT id FROM s3); + + +-- NOT IN with Subquery ON FULL OUTER JOIN +SELECT s1.id, s2.id AS id2 FROM s1 +FULL OUTER JOIN s2 +ON s1.id = s2.id +AND s1.id NOT IN (SELECT id FROM s3); + + +DROP VIEW s1; + +DROP VIEW s2; + +DROP VIEW s3; diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-joins.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-joins.sql.out index 7089fbd25e28..fbb4dd3b5efd 100644 --- a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-joins.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-joins.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 14 +-- Number of queries: 34 -- !query 0 @@ -64,9 +64,34 @@ create temporary view t3 as select * from values struct<> -- !query 2 output +-- !query 3 +create temporary view s1 as select * from values + (1), (3), (5), (7), (9) + as s1(id) +-- !query 3 schema +struct<> +-- !query 3 output --- !query 3 +-- !query 4 +create temporary view s2 as select * from values + (1), (3), (4), (6), (9) + as s2(id) +-- !query 4 schema +struct<> +-- !query 4 output + + +-- !query 5 +create temporary view s3 as select * from values + (3), (4), (6), (9) + as s3(id) +-- !query 5 schema +struct<> +-- !query 5 output + + +-- !query 6 SELECT t1a, t1b, t1c, t3a, t3b, t3c FROM t1 natural JOIN t3 WHERE t1a IN (SELECT t2a @@ -77,14 +102,14 @@ WHERE t1a IN (SELECT t2a ORDER BY t1a, t1b, t1c DESC nulls first --- !query 3 schema +-- !query 6 schema struct --- !query 3 output +-- !query 6 output val1b 8 16 val1b 8 16 val1b 8 16 val1b 8 16 --- !query 4 +-- !query 7 SELECT Count(DISTINCT(t1a)), t1b, t3a, @@ -103,9 +128,9 @@ GROUP BY t1a, t3b, t3c ORDER BY t1a DESC, t3b DESC, t3c ASC --- !query 4 schema +-- !query 7 schema struct --- !query 4 output +-- !query 7 output 1 10 val3b 8 NULL 1 10 val1b 8 16 1 10 val3a 6 12 @@ -113,7 +138,7 @@ struct 1 8 val3a 6 12 --- !query 5 +-- !query 8 SELECT Count(DISTINCT(t1a)) FROM t1 natural right JOIN t3 WHERE t1a IN @@ -129,13 +154,13 @@ AND t1d IN AND t1a = t3a GROUP BY t1a ORDER BY t1a --- !query 5 schema +-- !query 8 schema struct --- !query 5 output +-- !query 8 output 1 --- !query 6 +-- !query 9 SELECT t1a, t1b, t1c, @@ -151,9 +176,9 @@ where t1a IN AND t1b != t3b AND t1a = 'val1b' ORDER BY t1a --- !query 6 schema +-- !query 9 schema struct --- !query 6 output +-- !query 9 output val1b 8 16 val3a 6 12 val1b 8 16 val3a 6 12 val1b 8 16 val1b 10 12 @@ -162,7 +187,7 @@ val1b 8 16 val3c 17 16 val1b 8 16 val3c 17 16 --- !query 7 +-- !query 10 SELECT Count(DISTINCT(t1a)), t1b FROM t1 RIGHT JOIN t3 @@ -181,13 +206,13 @@ GROUP BY t1a, t1b HAVING t1b > 8 ORDER BY t1a --- !query 7 schema +-- !query 10 schema struct --- !query 7 output +-- !query 10 output 1 10 --- !query 8 +-- !query 11 SELECT Count(DISTINCT(t1a)) FROM t1 LEFT OUTER JOIN t3 @@ -199,15 +224,15 @@ WHERE t1a IN WHERE t1h < t2h ) GROUP BY t1a ORDER BY t1a --- !query 8 schema +-- !query 11 schema struct --- !query 8 output +-- !query 11 output 1 1 1 --- !query 9 +-- !query 12 SELECT Count(DISTINCT(t1a)), t1b FROM t1 INNER JOIN t2 @@ -224,14 +249,14 @@ OR t1a IN WHERE t2h < t1h) GROUP BY t1b HAVING t1b > 6 --- !query 9 schema +-- !query 12 schema struct --- !query 9 output +-- !query 12 output 1 10 1 8 --- !query 10 +-- !query 13 SELECT Count(DISTINCT(t1a)), t1b FROM t1 @@ -249,13 +274,13 @@ AND t1h IN where t2b = t3b) GROUP BY t1b HAVING t1b > 8 --- !query 10 schema +-- !query 13 schema struct --- !query 10 output +-- !query 13 output 1 10 --- !query 11 +-- !query 14 SELECT Count(DISTINCT(t1a)), t1b FROM t1 @@ -280,13 +305,13 @@ AND t1b IN GROUP BY t1b HAVING t1b > 8 --- !query 11 schema +-- !query 14 schema struct -- !query 11 output 1 10 --- !query 12 +-- !query 15 SELECT Count(DISTINCT(t1a)), t1b FROM t1 @@ -314,13 +339,13 @@ AND t1b IN AND t1a = t2a GROUP BY t1b ORDER BY t1b DESC --- !query 12 schema +-- !query 15 schema struct --- !query 12 output +-- !query 15 output 1 8 --- !query 13 +-- !query 16 SELECT t1a, t1b, t1c, @@ -345,9 +370,218 @@ and t1a = t2a Group By t1a, t1b, t1c, t2a, t2b, t2c HAVING t2c IS NOT NULL ORDER By t2b DESC nulls last --- !query 13 schema +-- !query 16 schema struct --- !query 13 output +-- !query 16 output val1b 8 16 1 10 12 val1b 8 16 1 8 16 val1b 8 16 1 NULL 16 + + + +-- !query 17 +SELECT s1.id FROM s1 +JOIN s2 ON s1.id = s2.id +AND s1.id IN (SELECT 9) +-- !query 17 schema +struct +-- !query 17 output +9 + + +-- !query 18 +SELECT s1.id FROM s1 +JOIN s2 ON s1.id = s2.id +AND s1.id NOT IN (SELECT 9) +-- !query 18 schema +struct +-- !query 18 output +1 +3 + + +-- !query 19 +SELECT s1.id FROM s1 +JOIN s2 ON s1.id = s2.id +AND s1.id IN (SELECT id FROM s3) +-- !query 19 schema +struct +-- !query 19 output +3 +9 + + +-- !query 20 +SELECT s1.id AS id2 FROM s1 +LEFT SEMI JOIN s2 +ON s1.id = s2.id +AND s1.id IN (SELECT id FROM s3) +-- !query 20 schema +struct +-- !query 20 output +3 +9 + + +-- !query 21 +SELECT s1.id as id2 FROM s1 +LEFT ANTI JOIN s2 +ON s1.id = s2.id +AND s1.id IN (SELECT id FROM s3) +-- !query 21 schema +struct +-- !query 21 output +1 +5 +7 + + +-- !query 22 +SELECT s1.id, s2.id as id2 FROM s1 +LEFT OUTER JOIN s2 +ON s1.id = s2.id +AND s1.id IN (SELECT id FROM s3) +-- !query 22 schema +struct +-- !query 22 output +1 NULL +3 3 +5 NULL +7 NULL +9 9 + + +-- !query 23 +SELECT s1.id, s2.id as id2 FROM s1 +RIGHT OUTER JOIN s2 +ON s1.id = s2.id +AND s1.id IN (SELECT id FROM s3) +-- !query 23 schema +struct +-- !query 23 output +3 3 +9 9 +NULL 1 +NULL 4 +NULL 6 + + +-- !query 24 +SELECT s1.id, s2.id AS id2 FROM s1 +FULL OUTER JOIN s2 +ON s1.id = s2.id +AND s1.id IN (SELECT id FROM s3) +-- !query 24 schema +struct +-- !query 24 output +1 NULL +3 3 +5 NULL +7 NULL +9 9 +NULL 1 +NULL 4 +NULL 6 + + +-- !query 25 +SELECT s1.id FROM s1 +JOIN s2 ON s1.id = s2.id +AND s1.id NOT IN (SELECT id FROM s3) +-- !query 25 schema +struct +-- !query 25 output +1 + + +-- !query 26 +SELECT s1.id AS id2 FROM s1 +LEFT SEMI JOIN s2 +ON s1.id = s2.id +AND s1.id NOT IN (SELECT id FROM s3) +-- !query 26 schema +struct +-- !query 26 output +1 + + +-- !query 27 +SELECT s1.id AS id2 FROM s1 +LEFT ANTI JOIN s2 +ON s1.id = s2.id +AND s1.id NOT IN (SELECT id FROM s3) +-- !query 27 schema +struct +-- !query 27 output +3 +5 +7 +9 + + +-- !query 28 +SELECT s1.id, s2.id AS id2 FROM s1 +LEFT OUTER JOIN s2 +ON s1.id = s2.id +AND s1.id NOT IN (SELECT id FROM s3) +-- !query 28 schema +struct +-- !query 28 output +1 1 +3 NULL +5 NULL +7 NULL +9 NULL + + +-- !query 29 +SELECT s1.id, s2.id AS id2 FROM s1 +RIGHT OUTER JOIN s2 +ON s1.id = s2.id +AND s1.id NOT IN (SELECT id FROM s3) +-- !query 29 schema +struct +-- !query 29 output +1 1 +NULL 3 +NULL 4 +NULL 6 +NULL 9 + +-- !query 30 +SELECT s1.id, s2.id AS id2 FROM s1 +FULL OUTER JOIN s2 +ON s1.id = s2.id +AND s1.id NOT IN (SELECT id FROM s3) +-- !query 30 schema +struct +-- !query 30 output +1 1 +3 NULL +5 NULL +7 NULL +9 NULL +NULL 3 +NULL 4 +NULL 6 +NULL 9 + + +-- !query 31 +DROP VIEW s1 +-- !query 31 schema +struct<> +-- !query 31 output + +-- !query 32 +DROP VIEW s2 +-- !query 32 schema +struct<> +-- !query 32 output + +-- !query 33 +DROP VIEW s3 +-- !query 33 schema +struct<> +-- !query 33 output + diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala index e05af08dfb74..c117ee7818c0 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala @@ -204,154 +204,6 @@ class SubquerySuite extends QueryTest with SharedSparkSession { } } - test("SPARK-29145: JOIN Condition use QueryList") { - withTempView("s1", "s2", "s3") { - Seq(1, 3, 5, 7, 9).toDF("id").createOrReplaceTempView("s1") - Seq(1, 3, 4, 6, 9).toDF("id").createOrReplaceTempView("s2") - Seq(3, 4, 6, 9).toDF("id").createOrReplaceTempView("s3") - - checkAnswer( - sql( - """ - | SELECT s1.id FROM s1 - | JOIN s2 ON s1.id = s2.id - | AND s1.id IN (SELECT 9) - """.stripMargin), - Row(9) :: Nil) - - checkAnswer( - sql( - """ - | SELECT s1.id FROM s1 - | JOIN s2 ON s1.id = s2.id - | AND s1.id NOT IN (SELECT 9) - """.stripMargin), - Row(1) :: Row(3) :: Nil) - - // case `IN` - checkAnswer( - sql( - """ - | SELECT s1.id FROM s1 - | JOIN s2 ON s1.id = s2.id - | AND s1.id IN (SELECT id FROM s3) - """.stripMargin), - Row(3) :: Row(9) :: Nil) - - checkAnswer( - sql( - """ - | SELECT s1.id AS id2 FROM s1 - | LEFT SEMI JOIN s2 - | ON s1.id = s2.id - | AND s1.id IN (SELECT id FROM s3) - """.stripMargin), - Row(3) :: Row(9) :: Nil) - - checkAnswer( - sql( - """ - | SELECT s1.id as id2 FROM s1 - | LEFT ANTI JOIN s2 - | ON s1.id = s2.id - | AND s1.id IN (SELECT id FROM s3) - """.stripMargin), - Row(1) :: Row(5) :: Row(7) :: Nil) - - checkAnswer( - sql( - """ - | SELECT s1.id, s2.id as id2 FROM s1 - | LEFT OUTER JOIN s2 - | ON s1.id = s2.id - | AND s1.id IN (SELECT id FROM s3) - """.stripMargin), - Row(1, null) :: Row(3, 3) :: Row(5, null) :: Row(7, null) :: Row(9, 9) :: Nil) - - checkAnswer( - sql( - """ - | SELECT s1.id, s2.id as id2 FROM s1 - | RIGHT OUTER JOIN s2 - | ON s1.id = s2.id - | AND s1.id IN (SELECT id FROM s3) - """.stripMargin), - Row(null, 1) :: Row(3, 3) :: Row(null, 4) :: Row(null, 6) :: Row(9, 9) :: Nil) - - checkAnswer( - sql( - """ - | SELECT s1.id, s2.id AS id2 FROM s1 - | FULL OUTER JOIN s2 - | ON s1.id = s2.id - | AND s1.id IN (SELECT id FROM s3) - """.stripMargin), - Row(1, null) :: Row(3, 3) :: Row(5, null) :: Row(7, null) :: Row(9, 9) :: - Row(null, 1) :: Row(null, 4) :: Row(null, 6) :: Nil) - - // case `NOT IN` - checkAnswer( - sql( - """ - | SELECT s1.id FROM s1 - | JOIN s2 ON s1.id = s2.id - | AND s1.id NOT IN (SELECT id FROM s3) - """.stripMargin), - Row(1) :: Nil) - - checkAnswer( - sql( - """ - | SELECT s1.id AS id2 FROM s1 - | LEFT SEMI JOIN s2 - | ON s1.id = s2.id - | AND s1.id NOT IN (SELECT id FROM s3) - """.stripMargin), - Row(1) :: Nil) - - checkAnswer( - sql( - """ - | SELECT s1.id AS id2 FROM s1 - | LEFT ANTI JOIN s2 - | ON s1.id = s2.id - | AND s1.id NOT IN (SELECT id FROM s3) - """.stripMargin), - Row(3) :: Row(5) :: Row(7) :: Row(9) :: Nil) - - checkAnswer( - sql( - """ - | SELECT s1.id, s2.id AS id2 FROM s1 - | LEFT OUTER JOIN s2 - | ON s1.id = s2.id - | AND s1.id NOT IN (SELECT id FROM s3) - """.stripMargin), - Row(1, 1) :: Row(3, null) :: Row(5, null) :: Row(7, null) :: Row(9, null) :: Nil) - - checkAnswer( - sql( - """ - | SELECT s1.id, s2.id AS id2 FROM s1 - | RIGHT OUTER JOIN s2 - | ON s1.id = s2.id - | AND s1.id NOT IN (SELECT id FROM s3) - """.stripMargin), - Row(1, 1) :: Row(null, 3) :: Row(null, 4) :: Row(null, 6) :: Row(null, 9) :: Nil) - - checkAnswer( - sql( - """ - | SELECT s1.id, s2.id AS id2 FROM s1 - | FULL OUTER JOIN s2 - | ON s1.id = s2.id - | AND s1.id NOT IN (SELECT id FROM s3) - """.stripMargin), - Row(1, 1) :: Row(3, null) :: Row(5, null) :: Row(7, null) :: Row(9, null) :: - Row(null, 3) :: Row(null, 4) :: Row(null, 6) :: Row(null, 9) :: Nil) - } - } - test("SPARK-14791: scalar subquery inside broadcast join") { val df = sql("select a, sum(b) as s from l group by a having a > (select avg(a) from l)") val expected = Row(3, 2.0, 3, 3.0) :: Row(6, null, 6, null) :: Nil