From b98865127a39bde885f9b1680cfe608629d59d51 Mon Sep 17 00:00:00 2001 From: Nattavut Sutyanyong Date: Fri, 29 Jul 2016 17:43:56 -0400 Subject: [PATCH 01/10] [SPARK-16804][SQL] Correlated subqueries containing LIMIT return incorrect results ## What changes were proposed in this pull request? This patch fixes the incorrect results in the rule ResolveSubquery in Catalyst's Analysis phase. ## How was this patch tested? ./dev/run-tests a new unit test on the problematic pattern. --- .../apache/spark/sql/catalyst/analysis/Analyzer.scala | 10 ++++++++++ .../sql/catalyst/analysis/AnalysisErrorSuite.scala | 8 ++++++++ 2 files changed, 18 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 2efa997ff22d..c3ee6517875c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -1021,6 +1021,16 @@ class Analyzer( case e: Expand => failOnOuterReferenceInSubTree(e, "an EXPAND") e + case l @ LocalLimit(_, child) => + failOnOuterReferenceInSubTree(l, "LIMIT") + l + // Since LIMIT is represented as GlobalLimit(, (LocalLimit (, child)) + // and we are walking bottom up, we will fail on LocalLimit before + // reaching GlobalLimit. + // The code below is just a safety net. + case g @ GlobalLimit(_, child) => + failOnOuterReferenceInSubTree(g, "LIMIT") + g case p => failOnOuterReference(p) p diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala index ff112c51697a..b78a988eddbb 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala @@ -533,5 +533,13 @@ class AnalysisErrorSuite extends AnalysisTest { Exists(Union(LocalRelation(b), Filter(EqualTo(OuterReference(a), c), LocalRelation(c)))), LocalRelation(a)) assertAnalysisError(plan3, "Accessing outer query column is not allowed in" :: Nil) + + val plan4 = Filter( + Exists( + Limit(1, + Filter(EqualTo(OuterReference(a), b), LocalRelation(b))) + ), + LocalRelation(a)) + assertAnalysisError(plan4, "Accessing outer query column is not allowed in LIMIT" :: Nil) } } From 069ed8f8e5f14dca7a15701945d42fc27fe82f3c Mon Sep 17 00:00:00 2001 From: Nattavut Sutyanyong Date: Fri, 29 Jul 2016 17:50:02 -0400 Subject: [PATCH 02/10] [SPARK-16804][SQL] Correlated subqueries containing LIMIT return incorrect results ## What changes were proposed in this pull request? This patch fixes the incorrect results in the rule ResolveSubquery in Catalyst's Analysis phase. ## How was this patch tested? ./dev/run-tests a new unit test on the problematic pattern. --- .../org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index c3ee6517875c..357c763f5946 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -1022,14 +1022,14 @@ class Analyzer( failOnOuterReferenceInSubTree(e, "an EXPAND") e case l @ LocalLimit(_, child) => - failOnOuterReferenceInSubTree(l, "LIMIT") + failOnOuterReferenceInSubTree(l, "a LIMIT") l // Since LIMIT is represented as GlobalLimit(, (LocalLimit (, child)) // and we are walking bottom up, we will fail on LocalLimit before // reaching GlobalLimit. // The code below is just a safety net. case g @ GlobalLimit(_, child) => - failOnOuterReferenceInSubTree(g, "LIMIT") + failOnOuterReferenceInSubTree(g, "a LIMIT") g case p => failOnOuterReference(p) From edca333c081e6d4e53a91b496fba4a3ef4ee89ac Mon Sep 17 00:00:00 2001 From: Nattavut Sutyanyong Date: Fri, 29 Jul 2016 20:28:15 -0400 Subject: [PATCH 03/10] New positive test cases --- .../org/apache/spark/sql/SubquerySuite.scala | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala index afed342ff8e2..52387b4b72a1 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala @@ -571,4 +571,33 @@ class SubquerySuite extends QueryTest with SharedSQLContext { Row(1.0, false) :: Row(1.0, false) :: Row(2.0, true) :: Row(2.0, true) :: Row(3.0, false) :: Row(5.0, true) :: Row(null, false) :: Row(null, true) :: Nil) } + + test("SPARK-16804: Correlated subqueries containing LIMIT - 1") { + withTempView("onerow") { + Seq(1).toDF("c1").createOrReplaceTempView("onerow") + + checkAnswer( + sql( + """ + | select c1 from onerow t1 + | where exists (select 1 from onerow t2 where t1.c1=t2.c1) + | and exists (select 1 from onerow LIMIT 1)""".stripMargin), + Row(1) :: Nil) + } + } + + test("SPARK-16804: Correlated subqueries containing LIMIT - 2") { + withTempView("onerow") { + Seq(1).toDF("c1").createOrReplaceTempView("onerow") + + checkAnswer( + sql( + """ + | select c1 from onerow t1 + | where exists (select 1 + | from (select 1 from onerow t2 LIMIT 1) + | where t1.c1=t2.c1)""".stripMargin), + Row(1) :: Nil) + } + } } From 64184fdb77c1a305bb2932e82582da28bb4c0e53 Mon Sep 17 00:00:00 2001 From: Nattavut Sutyanyong Date: Mon, 1 Aug 2016 09:20:09 -0400 Subject: [PATCH 04/10] Fix unit test case failure --- .../apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala index b78a988eddbb..c08de826bd94 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala @@ -540,6 +540,6 @@ class AnalysisErrorSuite extends AnalysisTest { Filter(EqualTo(OuterReference(a), b), LocalRelation(b))) ), LocalRelation(a)) - assertAnalysisError(plan4, "Accessing outer query column is not allowed in LIMIT" :: Nil) + assertAnalysisError(plan4, "Accessing outer query column is not allowed in a LIMIT" :: Nil) } } From 29f82b05c9e40e7934397257c674b260a8e8a996 Mon Sep 17 00:00:00 2001 From: Nattavut Sutyanyong Date: Fri, 5 Aug 2016 13:42:01 -0400 Subject: [PATCH 05/10] blocking TABLESAMPLE --- .../org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 7 +++++-- .../spark/sql/catalyst/analysis/AnalysisErrorSuite.scala | 8 ++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 357c763f5946..9d99c4173d4a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -1021,16 +1021,19 @@ class Analyzer( case e: Expand => failOnOuterReferenceInSubTree(e, "an EXPAND") e - case l @ LocalLimit(_, child) => + case l @ LocalLimit(_, _) => failOnOuterReferenceInSubTree(l, "a LIMIT") l // Since LIMIT is represented as GlobalLimit(, (LocalLimit (, child)) // and we are walking bottom up, we will fail on LocalLimit before // reaching GlobalLimit. // The code below is just a safety net. - case g @ GlobalLimit(_, child) => + case g @ GlobalLimit(_, _) => failOnOuterReferenceInSubTree(g, "a LIMIT") g + case s @ Sample(_, _, _, _, _) => + failOnOuterReferenceInSubTree(s, "a TABLESAMPLE") + s case p => failOnOuterReference(p) p diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala index c08de826bd94..0b7d681be511 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala @@ -541,5 +541,13 @@ class AnalysisErrorSuite extends AnalysisTest { ), LocalRelation(a)) assertAnalysisError(plan4, "Accessing outer query column is not allowed in a LIMIT" :: Nil) + + val plan5 = Filter( + Exists( + Sample(0.0, 0.5, false, 1L, + Filter(EqualTo(OuterReference(a), b), LocalRelation(b)))().select('b) + ), + LocalRelation(a)) + assertAnalysisError(plan5, "Accessing outer query column is not allowed in a TABLESAMPLE" :: Nil) } } From ac43ab47907a1ccd6d22f920415fbb4de93d4720 Mon Sep 17 00:00:00 2001 From: Nattavut Sutyanyong Date: Fri, 5 Aug 2016 17:10:19 -0400 Subject: [PATCH 06/10] Fixing code styling --- .../org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 9d99c4173d4a..29ede7048a2d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -1021,17 +1021,17 @@ class Analyzer( case e: Expand => failOnOuterReferenceInSubTree(e, "an EXPAND") e - case l @ LocalLimit(_, _) => + case l : LocalLimit => failOnOuterReferenceInSubTree(l, "a LIMIT") l // Since LIMIT is represented as GlobalLimit(, (LocalLimit (, child)) // and we are walking bottom up, we will fail on LocalLimit before // reaching GlobalLimit. // The code below is just a safety net. - case g @ GlobalLimit(_, _) => + case g : GlobalLimit => failOnOuterReferenceInSubTree(g, "a LIMIT") g - case s @ Sample(_, _, _, _, _) => + case s : Sample => failOnOuterReferenceInSubTree(s, "a TABLESAMPLE") s case p => From 631d396031e8bf627eb1f4872a4d3a17c144536c Mon Sep 17 00:00:00 2001 From: Nattavut Sutyanyong Date: Sun, 7 Aug 2016 14:39:44 -0400 Subject: [PATCH 07/10] Correcting Scala test style --- .../spark/sql/catalyst/analysis/AnalysisErrorSuite.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala index 0b7d681be511..8935d979414a 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala @@ -548,6 +548,7 @@ class AnalysisErrorSuite extends AnalysisTest { Filter(EqualTo(OuterReference(a), b), LocalRelation(b)))().select('b) ), LocalRelation(a)) - assertAnalysisError(plan5, "Accessing outer query column is not allowed in a TABLESAMPLE" :: Nil) + assertAnalysisError(plan5, + "Accessing outer query column is not allowed in a TABLESAMPLE" :: Nil) } } From 7eb9b2dbba3633a1958e38e0019e3ce816300514 Mon Sep 17 00:00:00 2001 From: Nattavut Sutyanyong Date: Sun, 7 Aug 2016 22:31:09 -0400 Subject: [PATCH 08/10] One (last) attempt to correct the Scala style tests --- .../apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala index 8935d979414a..6438065fb292 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala @@ -548,7 +548,7 @@ class AnalysisErrorSuite extends AnalysisTest { Filter(EqualTo(OuterReference(a), b), LocalRelation(b)))().select('b) ), LocalRelation(a)) - assertAnalysisError(plan5, + assertAnalysisError(plan5, "Accessing outer query column is not allowed in a TABLESAMPLE" :: Nil) } } From 092f2a585fa5d58054cf59a204acc728dffc66b4 Mon Sep 17 00:00:00 2001 From: Nattavut Sutyanyong Date: Fri, 3 Feb 2017 20:11:59 -0500 Subject: [PATCH 09/10] new test file --- .../sql-tests/inputs/scalar-subquery.sql | 20 - .../scalar-subquery-predicate.sql | 255 +++++++++++ .../sql-tests/results/scalar-subquery.sql.out | 46 -- .../scalar-subquery-predicate.sql.out | 407 ++++++++++++++++++ 4 files changed, 662 insertions(+), 66 deletions(-) delete mode 100644 sql/core/src/test/resources/sql-tests/inputs/scalar-subquery.sql create mode 100644 sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-predicate.sql delete mode 100644 sql/core/src/test/resources/sql-tests/results/scalar-subquery.sql.out create mode 100644 sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-predicate.sql.out diff --git a/sql/core/src/test/resources/sql-tests/inputs/scalar-subquery.sql b/sql/core/src/test/resources/sql-tests/inputs/scalar-subquery.sql deleted file mode 100644 index 3acc9db09cb8..000000000000 --- a/sql/core/src/test/resources/sql-tests/inputs/scalar-subquery.sql +++ /dev/null @@ -1,20 +0,0 @@ -CREATE OR REPLACE TEMPORARY VIEW p AS VALUES (1, 1) AS T(pk, pv); -CREATE OR REPLACE TEMPORARY VIEW c AS VALUES (1, 1) AS T(ck, cv); - --- SPARK-18814.1: Simplified version of TPCDS-Q32 -SELECT pk, cv -FROM p, c -WHERE p.pk = c.ck -AND c.cv = (SELECT avg(c1.cv) - FROM c c1 - WHERE c1.ck = p.pk); - --- SPARK-18814.2: Adding stack of aggregates -SELECT pk, cv -FROM p, c -WHERE p.pk = c.ck -AND c.cv = (SELECT max(avg) - FROM (SELECT c1.cv, avg(c1.cv) avg - FROM c c1 - WHERE c1.ck = p.pk - GROUP BY c1.cv)); diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-predicate.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-predicate.sql new file mode 100644 index 000000000000..716ee30bef62 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-predicate.sql @@ -0,0 +1,255 @@ +-- A test suite for scalar subquery in predicate context + +CREATE OR REPLACE TEMPORARY VIEW p AS VALUES (1, 1) AS T(pk, pv); +CREATE OR REPLACE TEMPORARY VIEW c AS VALUES (1, 1) AS T(ck, cv); + +-- SPARK-18814.1: Simplified version of TPCDS-Q32 +SELECT pk, cv +FROM p, c +WHERE p.pk = c.ck +AND c.cv = (SELECT avg(c1.cv) + FROM c c1 + WHERE c1.ck = p.pk); + +-- SPARK-18814.2: Adding stack of aggregates +SELECT pk, cv +FROM p, c +WHERE p.pk = c.ck +AND c.cv = (SELECT max(avg) + FROM (SELECT c1.cv, avg(c1.cv) avg + FROM c c1 + WHERE c1.ck = p.pk + GROUP BY c1.cv)); + +create temporary view t1 as select * from values + ('val1a', 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 00:00:00.000', date '2014-04-04'), + ('val1b', 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'), + ('val1a', 16S, 12, 21L, float(15.0), 20D, 20E2, timestamp '2014-06-04 01:02:00.001', date '2014-06-04'), + ('val1a', 16S, 12, 10L, float(15.0), 20D, 20E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'), + ('val1c', 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:02:00.001', date '2014-05-05'), + ('val1d', null, 16, 22L, float(17.0), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', null), + ('val1d', null, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-07-04 01:02:00.001', null), + ('val1e', 10S, null, 25L, float(17.0), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-04'), + ('val1e', 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-09-04 01:02:00.001', date '2014-09-04'), + ('val1d', 10S, null, 12L, float(17.0), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'), + ('val1a', 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'), + ('val1e', 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04') + as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i); + +create temporary view t2 as select * from values + ('val2a', 6S, 12, 14L, float(15), 20D, 20E2, timestamp '2014-04-04 01:01:00.000', date '2014-04-04'), + ('val1b', 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'), + ('val1b', 8S, 16, 119L, float(17), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'), + ('val1c', 12S, 16, 219L, float(17), 25D, 26E2, timestamp '2016-05-04 01:01:00.000', date '2016-05-04'), + ('val1b', null, 16, 319L, float(17), 25D, 26E2, timestamp '2017-05-04 01:01:00.000', null), + ('val2e', 8S, null, 419L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'), + ('val1f', 19S, null, 519L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'), + ('val1b', 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'), + ('val1b', 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'), + ('val1c', 12S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-05'), + ('val1e', 8S, null, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'), + ('val1f', 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'), + ('val1b', null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', null) + as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i); + +create temporary view t3 as select * from values + ('val3a', 6S, 12, 110L, float(15), 20D, 20E2, timestamp '2014-04-04 01:02:00.000', date '2014-04-04'), + ('val3a', 6S, 12, 10L, float(15), 20D, 20E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'), + ('val1b', 10S, 12, 219L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'), + ('val1b', 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'), + ('val1b', 8S, 16, 319L, float(17), 25D, 26E2, timestamp '2014-06-04 01:02:00.000', date '2014-06-04'), + ('val1b', 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:02:00.000', date '2014-07-04'), + ('val3c', 17S, 16, 519L, float(17), 25D, 26E2, timestamp '2014-08-04 01:02:00.000', date '2014-08-04'), + ('val3c', 17S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:02:00.000', date '2014-09-05'), + ('val1b', null, 16, 419L, float(17), 25D, 26E2, timestamp '2014-10-04 01:02:00.000', null), + ('val1b', null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-11-04 01:02:00.000', null), + ('val3b', 8S, null, 719L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'), + ('val3b', 8S, null, 19L, float(17), 25D, 26E2, timestamp '2015-05-04 01:02:00.000', date '2015-05-04') + as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i); + +-- Group 1: scalar subquery in predicate context +-- no correlation +-- TC 01.01 +SELECT t1a, t1b +FROM t1 +WHERE t1c = (SELECT max(t2c) + FROM t2); + +-- TC 01.02 +SELECT t1a, t1d, t1f +FROM t1 +WHERE t1c = (SELECT max(t2c) + FROM t2) +AND t1b > (SELECT min(t3b) + FROM t3); + +-- TC 01.03 +SELECT t1a, t1h +FROM t1 +WHERE t1c = (SELECT max(t2c) + FROM t2) +OR t1b = (SELECT min(t3b) + FROM t3 + WHERE t3b > 10); + +-- TC 01.04 +-- scalar subquery over outer join +SELECT t1a, t1b, t2d +FROM t1 LEFT JOIN t2 + ON t1a = t2a +WHERE t1b = (SELECT min(t3b) + FROM t3); + +-- TC 01.05 +-- test casting +SELECT t1a, t1b, t1g +FROM t1 +WHERE t1c + 5 = (SELECT max(t2e) + FROM t2); + +-- TC 01.06 +-- test casting +SELECT t1a, t1h +FROM t1 +WHERE date(t1h) = (SELECT min(t2i) + FROM t2); + +-- TC 01.07 +-- same table, expressions in scalar subquery +SELECT t2d, t1a +FROM t1, t2 +WHERE t1b = t2b +AND t2c + 1 = (SELECT max(t2c) + 1 + FROM t2, t1 + WHERE t2b = t1b); + +-- TC 01.08 +-- same table +SELECT DISTINCT t2a, max_t1g +FROM t2, (SELECT max(t1g) max_t1g, t1a + FROM t1 + GROUP BY t1a) t1 +WHERE t2a = t1a +AND max_t1g = (SELECT max(t1g) + FROM t1); + +-- TC 01.09 +-- more than one scalar subquery +SELECT t3b, t3c +FROM t3 +WHERE (SELECT max(t3c) + FROM t3 + WHERE t3b > 10) >= + (SELECT min(t3b) + FROM t3 + WHERE t3c > 0) +AND (t3b is null or t3c is null); + +-- Group 2: scalar subquery in predicate context +-- with correlation +-- TC 02.01 +SELECT t1a +FROM t1 +WHERE t1a < (SELECT max(t2a) + FROM t2 + WHERE t2c = t1c + GROUP BY t2c); + +-- TC 02.02 +SELECT t1a, t1c +FROM t1 +WHERE (SELECT max(t2a) + FROM t2 + WHERE t2c = t1c + GROUP BY t2c) IS NULL; + +-- TC 02.03 +SELECT t1a +FROM t1 +WHERE t1a = (SELECT max(t2a) + FROM t2 + WHERE t2c = t1c + GROUP BY t2c + HAVING count(*) >= 0) +OR t1i > '2014-12-31'; + +-- TC 02.04 +-- t1 on the right of an outer join +-- can be reduced to inner join +SELECT count(t1a) +FROM t1 RIGHT JOIN t2 +ON t1d = t2d +WHERE t1a < (SELECT max(t2a) + FROM t2 + WHERE t2c = t1c + GROUP BY t2c); + +-- TC 02.05 +SELECT t1a +FROM t1 +WHERE t1b <= (SELECT max(t2b) + FROM t2 + WHERE t2c = t1c + GROUP BY t2c) +AND t1b >= (SELECT min(t2b) + FROM t2 + WHERE t2c = t1c + GROUP BY t2c); + +-- TC 02.06 +-- set op +SELECT t1a +FROM t1 +WHERE t1a <= (SELECT max(t2a) + FROM t2 + WHERE t2c = t1c + GROUP BY t2c) +INTERSECT +SELECT t1a +FROM t1 +WHERE t1a >= (SELECT min(t2a) + FROM t2 + WHERE t2c = t1c + GROUP BY t2c); + +-- TC 02.07 +-- set op +SELECT t1a +FROM t1 +WHERE t1a <= (SELECT max(t2a) + FROM t2 + WHERE t2c = t1c + GROUP BY t2c) +UNION ALL +SELECT t1a +FROM t1 +WHERE t1a >= (SELECT min(t2a) + FROM t2 + WHERE t2c = t1c + GROUP BY t2c); + +-- TC 02.08 +-- set op +SELECT t1a +FROM t1 +WHERE t1a <= (SELECT max(t2a) + FROM t2 + WHERE t2c = t1c + GROUP BY t2c) +MINUS +SELECT t1a +FROM t1 +WHERE t1a >= (SELECT min(t2a) + FROM t2 + WHERE t2c = t1c + GROUP BY t2c); + +-- TC 02.09 +-- in HAVING clause +SELECT t1a +FROM t1 +GROUP BY t1a, t1c +HAVING max(t1b) <= (SELECT max(t2b) + FROM t2 + WHERE t2c = t1c + GROUP BY t2c); diff --git a/sql/core/src/test/resources/sql-tests/results/scalar-subquery.sql.out b/sql/core/src/test/resources/sql-tests/results/scalar-subquery.sql.out deleted file mode 100644 index c249329d6a61..000000000000 --- a/sql/core/src/test/resources/sql-tests/results/scalar-subquery.sql.out +++ /dev/null @@ -1,46 +0,0 @@ --- Automatically generated by SQLQueryTestSuite --- Number of queries: 4 - - --- !query 0 -CREATE OR REPLACE TEMPORARY VIEW p AS VALUES (1, 1) AS T(pk, pv) --- !query 0 schema -struct<> --- !query 0 output - - - --- !query 1 -CREATE OR REPLACE TEMPORARY VIEW c AS VALUES (1, 1) AS T(ck, cv) --- !query 1 schema -struct<> --- !query 1 output - - - --- !query 2 -SELECT pk, cv -FROM p, c -WHERE p.pk = c.ck -AND c.cv = (SELECT avg(c1.cv) - FROM c c1 - WHERE c1.ck = p.pk) --- !query 2 schema -struct --- !query 2 output -1 1 - - --- !query 3 -SELECT pk, cv -FROM p, c -WHERE p.pk = c.ck -AND c.cv = (SELECT max(avg) - FROM (SELECT c1.cv, avg(c1.cv) avg - FROM c c1 - WHERE c1.ck = p.pk - GROUP BY c1.cv)) --- !query 3 schema -struct --- !query 3 output -1 1 diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-predicate.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-predicate.sql.out new file mode 100644 index 000000000000..992959be6c6a --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-predicate.sql.out @@ -0,0 +1,407 @@ +-- Automatically generated by SQLQueryTestSuite +-- Number of queries: 25 + + +-- !query 0 +CREATE OR REPLACE TEMPORARY VIEW p AS VALUES (1, 1) AS T(pk, pv) +-- !query 0 schema +struct<> +-- !query 0 output + + + +-- !query 1 +CREATE OR REPLACE TEMPORARY VIEW c AS VALUES (1, 1) AS T(ck, cv) +-- !query 1 schema +struct<> +-- !query 1 output + + + +-- !query 2 +SELECT pk, cv +FROM p, c +WHERE p.pk = c.ck +AND c.cv = (SELECT avg(c1.cv) + FROM c c1 + WHERE c1.ck = p.pk) +-- !query 2 schema +struct +-- !query 2 output +1 1 + + +-- !query 3 +SELECT pk, cv +FROM p, c +WHERE p.pk = c.ck +AND c.cv = (SELECT max(avg) + FROM (SELECT c1.cv, avg(c1.cv) avg + FROM c c1 + WHERE c1.ck = p.pk + GROUP BY c1.cv)) +-- !query 3 schema +struct +-- !query 3 output +1 1 + + +-- !query 4 +create temporary view t1 as select * from values + ('val1a', 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 00:00:00.000', date '2014-04-04'), + ('val1b', 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'), + ('val1a', 16S, 12, 21L, float(15.0), 20D, 20E2, timestamp '2014-06-04 01:02:00.001', date '2014-06-04'), + ('val1a', 16S, 12, 10L, float(15.0), 20D, 20E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'), + ('val1c', 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:02:00.001', date '2014-05-05'), + ('val1d', null, 16, 22L, float(17.0), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', null), + ('val1d', null, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-07-04 01:02:00.001', null), + ('val1e', 10S, null, 25L, float(17.0), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-04'), + ('val1e', 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-09-04 01:02:00.001', date '2014-09-04'), + ('val1d', 10S, null, 12L, float(17.0), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'), + ('val1a', 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'), + ('val1e', 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04') + as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i) +-- !query 4 schema +struct<> +-- !query 4 output + + + +-- !query 5 +create temporary view t2 as select * from values + ('val2a', 6S, 12, 14L, float(15), 20D, 20E2, timestamp '2014-04-04 01:01:00.000', date '2014-04-04'), + ('val1b', 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'), + ('val1b', 8S, 16, 119L, float(17), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'), + ('val1c', 12S, 16, 219L, float(17), 25D, 26E2, timestamp '2016-05-04 01:01:00.000', date '2016-05-04'), + ('val1b', null, 16, 319L, float(17), 25D, 26E2, timestamp '2017-05-04 01:01:00.000', null), + ('val2e', 8S, null, 419L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'), + ('val1f', 19S, null, 519L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'), + ('val1b', 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'), + ('val1b', 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'), + ('val1c', 12S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-05'), + ('val1e', 8S, null, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'), + ('val1f', 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'), + ('val1b', null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', null) + as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i) +-- !query 5 schema +struct<> +-- !query 5 output + + + +-- !query 6 +create temporary view t3 as select * from values + ('val3a', 6S, 12, 110L, float(15), 20D, 20E2, timestamp '2014-04-04 01:02:00.000', date '2014-04-04'), + ('val3a', 6S, 12, 10L, float(15), 20D, 20E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'), + ('val1b', 10S, 12, 219L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'), + ('val1b', 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'), + ('val1b', 8S, 16, 319L, float(17), 25D, 26E2, timestamp '2014-06-04 01:02:00.000', date '2014-06-04'), + ('val1b', 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:02:00.000', date '2014-07-04'), + ('val3c', 17S, 16, 519L, float(17), 25D, 26E2, timestamp '2014-08-04 01:02:00.000', date '2014-08-04'), + ('val3c', 17S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:02:00.000', date '2014-09-05'), + ('val1b', null, 16, 419L, float(17), 25D, 26E2, timestamp '2014-10-04 01:02:00.000', null), + ('val1b', null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-11-04 01:02:00.000', null), + ('val3b', 8S, null, 719L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'), + ('val3b', 8S, null, 19L, float(17), 25D, 26E2, timestamp '2015-05-04 01:02:00.000', date '2015-05-04') + as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i) +-- !query 6 schema +struct<> +-- !query 6 output + + + +-- !query 7 +SELECT t1a, t1b +FROM t1 +WHERE t1c = (SELECT max(t2c) + FROM t2) +-- !query 7 schema +struct +-- !query 7 output +val1b 8 +val1c 8 +val1d NULL +val1d NULL + + +-- !query 8 +SELECT t1a, t1d, t1f +FROM t1 +WHERE t1c = (SELECT max(t2c) + FROM t2) +AND t1b > (SELECT min(t3b) + FROM t3) +-- !query 8 schema +struct +-- !query 8 output +val1b 19 25.0 +val1c 19 25.0 + + +-- !query 9 +SELECT t1a, t1h +FROM t1 +WHERE t1c = (SELECT max(t2c) + FROM t2) +OR t1b = (SELECT min(t3b) + FROM t3 + WHERE t3b > 10) +-- !query 9 schema +struct +-- !query 9 output +val1b 2014-05-04 01:01:00 +val1c 2014-05-04 01:02:00.001 +val1d 2014-06-04 01:01:00 +val1d 2014-07-04 01:02:00.001 + + +-- !query 10 +SELECT t1a, t1b, t2d +FROM t1 LEFT JOIN t2 + ON t1a = t2a +WHERE t1b = (SELECT min(t3b) + FROM t3) +-- !query 10 schema +struct +-- !query 10 output +val1a 6 NULL +val1a 6 NULL + + +-- !query 11 +SELECT t1a, t1b, t1g +FROM t1 +WHERE t1c + 5 = (SELECT max(t2e) + FROM t2) +-- !query 11 schema +struct +-- !query 11 output +val1a 16 2000 +val1a 16 2000 + + +-- !query 12 +SELECT t1a, t1h +FROM t1 +WHERE date(t1h) = (SELECT min(t2i) + FROM t2) +-- !query 12 schema +struct +-- !query 12 output +val1a 2014-04-04 00:00:00 +val1a 2014-04-04 01:02:00.001 + + +-- !query 13 +SELECT t2d, t1a +FROM t1, t2 +WHERE t1b = t2b +AND t2c + 1 = (SELECT max(t2c) + 1 + FROM t2, t1 + WHERE t2b = t1b) +-- !query 13 schema +struct +-- !query 13 output +119 val1b +119 val1c +19 val1b +19 val1c + + +-- !query 14 +SELECT DISTINCT t2a, max_t1g +FROM t2, (SELECT max(t1g) max_t1g, t1a + FROM t1 + GROUP BY t1a) t1 +WHERE t2a = t1a +AND max_t1g = (SELECT max(t1g) + FROM t1) +-- !query 14 schema +struct +-- !query 14 output +val1b 2600 +val1c 2600 +val1e 2600 + + +-- !query 15 +SELECT t3b, t3c +FROM t3 +WHERE (SELECT max(t3c) + FROM t3 + WHERE t3b > 10) >= + (SELECT min(t3b) + FROM t3 + WHERE t3c > 0) +AND (t3b is null or t3c is null) +-- !query 15 schema +struct +-- !query 15 output +8 NULL +8 NULL +NULL 16 +NULL 16 + + +-- !query 16 +SELECT t1a +FROM t1 +WHERE t1a < (SELECT max(t2a) + FROM t2 + WHERE t2c = t1c + GROUP BY t2c) +-- !query 16 schema +struct +-- !query 16 output +val1a +val1a +val1b + + +-- !query 17 +SELECT t1a, t1c +FROM t1 +WHERE (SELECT max(t2a) + FROM t2 + WHERE t2c = t1c + GROUP BY t2c) IS NULL +-- !query 17 schema +struct +-- !query 17 output +val1a 8 +val1a 8 +val1d NULL +val1e NULL +val1e NULL +val1e NULL + + +-- !query 18 +SELECT t1a +FROM t1 +WHERE t1a = (SELECT max(t2a) + FROM t2 + WHERE t2c = t1c + GROUP BY t2c + HAVING count(*) >= 0) +OR t1i > '2014-12-31' +-- !query 18 schema +struct +-- !query 18 output +val1c +val1d + + +-- !query 19 +SELECT count(t1a) +FROM t1 RIGHT JOIN t2 +ON t1d = t2d +WHERE t1a < (SELECT max(t2a) + FROM t2 + WHERE t2c = t1c + GROUP BY t2c) +-- !query 19 schema +struct +-- !query 19 output +7 + + +-- !query 20 +SELECT t1a +FROM t1 +WHERE t1b <= (SELECT max(t2b) + FROM t2 + WHERE t2c = t1c + GROUP BY t2c) +AND t1b >= (SELECT min(t2b) + FROM t2 + WHERE t2c = t1c + GROUP BY t2c) +-- !query 20 schema +struct +-- !query 20 output +val1b +val1c + + +-- !query 21 +SELECT t1a +FROM t1 +WHERE t1a <= (SELECT max(t2a) + FROM t2 + WHERE t2c = t1c + GROUP BY t2c) +INTERSECT +SELECT t1a +FROM t1 +WHERE t1a >= (SELECT min(t2a) + FROM t2 + WHERE t2c = t1c + GROUP BY t2c) +-- !query 21 schema +struct +-- !query 21 output +val1b +val1c + + +-- !query 22 +SELECT t1a +FROM t1 +WHERE t1a <= (SELECT max(t2a) + FROM t2 + WHERE t2c = t1c + GROUP BY t2c) +UNION ALL +SELECT t1a +FROM t1 +WHERE t1a >= (SELECT min(t2a) + FROM t2 + WHERE t2c = t1c + GROUP BY t2c) +-- !query 22 schema +struct +-- !query 22 output +val1a +val1a +val1b +val1b +val1c +val1c +val1d +val1d + + +-- !query 23 +SELECT t1a +FROM t1 +WHERE t1a <= (SELECT max(t2a) + FROM t2 + WHERE t2c = t1c + GROUP BY t2c) +MINUS +SELECT t1a +FROM t1 +WHERE t1a >= (SELECT min(t2a) + FROM t2 + WHERE t2c = t1c + GROUP BY t2c) +-- !query 23 schema +struct +-- !query 23 output +val1a + + +-- !query 24 +SELECT t1a +FROM t1 +GROUP BY t1a, t1c +HAVING max(t1b) <= (SELECT max(t2b) + FROM t2 + WHERE t2c = t1c + GROUP BY t2c) +-- !query 24 schema +struct +-- !query 24 output +val1b +val1c From 044d6a4927f77bb50631d74830a7a999a6d6fd26 Mon Sep 17 00:00:00 2001 From: Nattavut Sutyanyong Date: Sun, 5 Feb 2017 15:53:53 -0500 Subject: [PATCH 10/10] Add UNION DISTINCT --- .../scalar-subquery-predicate.sql | 18 ++++++++++- .../scalar-subquery-predicate.sql.out | 31 ++++++++++++++++--- 2 files changed, 44 insertions(+), 5 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-predicate.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-predicate.sql index 716ee30bef62..fb0d07fbdace 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-predicate.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-predicate.sql @@ -212,7 +212,7 @@ WHERE t1a >= (SELECT min(t2a) WHERE t2c = t1c GROUP BY t2c); --- TC 02.07 +-- TC 02.07.01 -- set op SELECT t1a FROM t1 @@ -228,6 +228,22 @@ WHERE t1a >= (SELECT min(t2a) WHERE t2c = t1c GROUP BY t2c); +-- TC 02.07.02 +-- set op +SELECT t1a +FROM t1 +WHERE t1a <= (SELECT max(t2a) + FROM t2 + WHERE t2c = t1c + GROUP BY t2c) +UNION DISTINCT +SELECT t1a +FROM t1 +WHERE t1a >= (SELECT min(t2a) + FROM t2 + WHERE t2c = t1c + GROUP BY t2c); + -- TC 02.08 -- set op SELECT t1a diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-predicate.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-predicate.sql.out index 992959be6c6a..8b29300e71f9 100644 --- a/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-predicate.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-predicate.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 25 +-- Number of queries: 26 -- !query 0 @@ -379,7 +379,7 @@ WHERE t1a <= (SELECT max(t2a) FROM t2 WHERE t2c = t1c GROUP BY t2c) -MINUS +UNION DISTINCT SELECT t1a FROM t1 WHERE t1a >= (SELECT min(t2a) @@ -390,9 +390,32 @@ WHERE t1a >= (SELECT min(t2a) struct -- !query 23 output val1a +val1b +val1c +val1d -- !query 24 +SELECT t1a +FROM t1 +WHERE t1a <= (SELECT max(t2a) + FROM t2 + WHERE t2c = t1c + GROUP BY t2c) +MINUS +SELECT t1a +FROM t1 +WHERE t1a >= (SELECT min(t2a) + FROM t2 + WHERE t2c = t1c + GROUP BY t2c) +-- !query 24 schema +struct +-- !query 24 output +val1a + + +-- !query 25 SELECT t1a FROM t1 GROUP BY t1a, t1c @@ -400,8 +423,8 @@ HAVING max(t1b) <= (SELECT max(t2b) FROM t2 WHERE t2c = t1c GROUP BY t2c) --- !query 24 schema +-- !query 25 schema struct --- !query 24 output +-- !query 25 output val1b val1c