diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index aff1415de4e8..a7b7535d7336 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -278,7 +278,7 @@ locationSpec ; query - : ctes? queryNoWith + : ctes? queryTerm queryOrganization ; insertInto @@ -380,10 +380,6 @@ dmlStatementNoWith | fromClause multiInsertQueryBody+ #multiInsertQuery ; -queryNoWith - : queryTerm queryOrganization - ; - queryOrganization : (ORDER BY order+=sortItem (',' order+=sortItem)*)? (CLUSTER BY clusterBy+=expression (',' clusterBy+=expression)*)? @@ -412,7 +408,7 @@ queryPrimary | fromStatement #fromStmt | TABLE multipartIdentifier #table | inlineTable #inlineTableDefault1 - | '(' queryNoWith ')' #subquery + | '(' query ')' #subquery ; sortItem @@ -583,7 +579,7 @@ identifierComment relationPrimary : multipartIdentifier sample? tableAlias #tableName - | '(' queryNoWith ')' sample? tableAlias #aliasedQuery + | '(' query ')' sample? tableAlias #aliasedQuery | '(' relation ')' sample? tableAlias #aliasedRelation | inlineTable #inlineTableDefault2 | functionTable #tableValuedFunction diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 60517f11a249..e7f8ca696007 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -226,12 +226,13 @@ class Analyzer( case other => other } - def substituteCTE(plan: LogicalPlan, cteName: String, ctePlan: LogicalPlan): LogicalPlan = { + private def substituteCTE( + plan: LogicalPlan, + cteName: String, + ctePlan: LogicalPlan): LogicalPlan = { plan resolveOperatorsUp { case UnresolvedRelation(Seq(table)) if resolver(cteName, table) => ctePlan - case u: UnresolvedRelation => - u case other => // This cannot be done in ResolveSubquery because ResolveSubquery does not know the CTE. other transformExpressions { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index f08cb2a780fe..5eef8dbdfbff 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -111,7 +111,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging * Create a top-level plan with Common Table Expressions. */ override def visitQuery(ctx: QueryContext): LogicalPlan = withOrigin(ctx) { - val query = plan(ctx.queryNoWith) + val query = plan(ctx.queryTerm).optionalMap(ctx.queryOrganization)(withQueryResultClauses) // Apply CTEs query.optionalMap(ctx.ctes)(withCTE) @@ -175,10 +175,6 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging } } - override def visitQueryNoWith(ctx: QueryNoWithContext): LogicalPlan = withOrigin(ctx) { - plan(ctx.queryTerm).optionalMap(ctx.queryOrganization)(withQueryResultClauses) - } - /** * Create a named logical plan. * @@ -890,7 +886,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging * Create a logical plan for a sub-query. */ override def visitSubquery(ctx: SubqueryContext): LogicalPlan = withOrigin(ctx) { - plan(ctx.queryNoWith) + plan(ctx.query) } /** @@ -978,7 +974,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging * }}} */ override def visitAliasedQuery(ctx: AliasedQueryContext): LogicalPlan = withOrigin(ctx) { - val relation = plan(ctx.queryNoWith).optionalMap(ctx.sample)(withSample) + val relation = plan(ctx.query).optionalMap(ctx.sample)(withSample) if (ctx.tableAlias.strictIdentifier == null) { // For un-aliased subqueries, use a default alias name that is not likely to conflict with // normal subquery names, so that parent operators can only access the columns in subquery by diff --git a/sql/core/src/test/resources/sql-tests/inputs/cte.sql b/sql/core/src/test/resources/sql-tests/inputs/cte.sql index 822c5c4660e3..ac448eb2b27b 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/cte.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/cte.sql @@ -4,6 +4,9 @@ create temporary view t2 as select * from values 0, 1 as t(id); -- WITH clause should not fall into infinite loop by referencing self WITH s AS (SELECT 1 FROM s) SELECT * FROM s; +WITH r AS (SELECT (SELECT * FROM r)) +SELECT * FROM r; + -- WITH clause should reference the base table WITH t AS (SELECT 1 FROM t) SELECT * FROM t; @@ -28,6 +31,112 @@ FROM CTE1 t1 WITH t(x) AS (SELECT 1) SELECT * FROM t WHERE x = 1; +-- CTE in CTE definition +WITH t as ( + WITH t2 AS (SELECT 1) + SELECT * FROM t2 +) +SELECT * FROM t; + +-- CTE in subquery +SELECT max(c) FROM ( + WITH t(c) AS (SELECT 1) + SELECT * FROM t +); + +-- CTE in subquery expression +SELECT ( + WITH t AS (SELECT 1) + SELECT * FROM t +); + +-- CTE in CTE definition shadows outer +WITH + t AS (SELECT 1), + t2 AS ( + WITH t AS (SELECT 2) + SELECT * FROM t + ) +SELECT * FROM t2; + +-- CTE in CTE definition shadows outer 2 +WITH + t(c) AS (SELECT 1), + t2 AS ( + SELECT ( + SELECT max(c) FROM ( + WITH t(c) AS (SELECT 2) + SELECT * FROM t + ) + ) + ) +SELECT * FROM t2; + +-- CTE in CTE definition shadows outer 3 +WITH + t AS (SELECT 1), + t2 AS ( + WITH t AS (SELECT 2), + t2 AS ( + WITH t AS (SELECT 3) + SELECT * FROM t + ) + SELECT * FROM t2 + ) +SELECT * FROM t2; + +-- CTE in subquery shadows outer +WITH t(c) AS (SELECT 1) +SELECT max(c) FROM ( + WITH t(c) AS (SELECT 2) + SELECT * FROM t +); + +-- CTE in subquery shadows outer 2 +WITH t(c) AS (SELECT 1) +SELECT sum(c) FROM ( + SELECT max(c) AS c FROM ( + WITH t(c) AS (SELECT 2) + SELECT * FROM t + ) +); + +-- CTE in subquery shadows outer 3 +WITH t(c) AS (SELECT 1) +SELECT sum(c) FROM ( + WITH t(c) AS (SELECT 2) + SELECT max(c) AS c FROM ( + WITH t(c) AS (SELECT 3) + SELECT * FROM t + ) +); + +-- CTE in subquery expression shadows outer +WITH t AS (SELECT 1) +SELECT ( + WITH t AS (SELECT 2) + SELECT * FROM t +); + +-- CTE in subquery expression shadows outer 2 +WITH t AS (SELECT 1) +SELECT ( + SELECT ( + WITH t AS (SELECT 2) + SELECT * FROM t + ) +); + +-- CTE in subquery expression shadows outer 3 +WITH t AS (SELECT 1) +SELECT ( + WITH t AS (SELECT 2) + SELECT ( + WITH t AS (SELECT 3) + SELECT * FROM t + ) +); + -- Clean up DROP VIEW IF EXISTS t; DROP VIEW IF EXISTS t2; diff --git a/sql/core/src/test/resources/sql-tests/results/cte.sql.out b/sql/core/src/test/resources/sql-tests/results/cte.sql.out index f8ccecbc46f4..b89e29fd08dc 100644 --- a/sql/core/src/test/resources/sql-tests/results/cte.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/cte.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 10 +-- Number of queries: 23 -- !query 0 @@ -28,36 +28,46 @@ Table or view not found: s; line 1 pos 25 -- !query 3 -WITH t AS (SELECT 1 FROM t) SELECT * FROM t +WITH r AS (SELECT (SELECT * FROM r)) +SELECT * FROM r -- !query 3 schema -struct<1:int> +struct<> -- !query 3 output +org.apache.spark.sql.AnalysisException +Table or view not found: r; line 1 pos 33 + + +-- !query 4 +WITH t AS (SELECT 1 FROM t) SELECT * FROM t +-- !query 4 schema +struct<1:int> +-- !query 4 output 1 1 1 --- !query 4 +-- !query 5 WITH s1 AS (SELECT 1 FROM s2), s2 AS (SELECT 1 FROM s1) SELECT * FROM s1, s2 --- !query 4 schema +-- !query 5 schema struct<> --- !query 4 output +-- !query 5 output org.apache.spark.sql.AnalysisException Table or view not found: s2; line 1 pos 26 --- !query 5 +-- !query 6 WITH t1 AS (SELECT * FROM t2), t2 AS (SELECT 2 FROM t1) SELECT * FROM t1 cross join t2 --- !query 5 schema +-- !query 6 schema struct --- !query 5 output +-- !query 6 output 0 2 0 2 1 2 1 2 --- !query 6 +-- !query 7 WITH CTE1 AS ( SELECT b.id AS id FROM T2 a @@ -67,9 +77,9 @@ SELECT t1.id AS c1, t2.id AS c2 FROM CTE1 t1 CROSS JOIN CTE1 t2 --- !query 6 schema +-- !query 7 schema struct --- !query 6 output +-- !query 7 output 0 0 0 0 0 0 @@ -88,26 +98,192 @@ struct 1 1 --- !query 7 +-- !query 8 WITH t(x) AS (SELECT 1) SELECT * FROM t WHERE x = 1 --- !query 7 schema +-- !query 8 schema struct --- !query 7 output +-- !query 8 output 1 --- !query 8 +-- !query 9 +WITH t as ( + WITH t2 AS (SELECT 1) + SELECT * FROM t2 +) +SELECT * FROM t +-- !query 9 schema +struct<1:int> +-- !query 9 output +1 + + +-- !query 10 +SELECT max(c) FROM ( + WITH t(c) AS (SELECT 1) + SELECT * FROM t +) +-- !query 10 schema +struct +-- !query 10 output +1 + + +-- !query 11 +SELECT ( + WITH t AS (SELECT 1) + SELECT * FROM t +) +-- !query 11 schema +struct +-- !query 11 output +1 + + +-- !query 12 +WITH + t AS (SELECT 1), + t2 AS ( + WITH t AS (SELECT 2) + SELECT * FROM t + ) +SELECT * FROM t2 +-- !query 12 schema +struct<1:int> +-- !query 12 output +1 + + +-- !query 13 +WITH + t(c) AS (SELECT 1), + t2 AS ( + SELECT ( + SELECT max(c) FROM ( + WITH t(c) AS (SELECT 2) + SELECT * FROM t + ) + ) + ) +SELECT * FROM t2 +-- !query 13 schema +struct +-- !query 13 output +1 + + +-- !query 14 +WITH + t AS (SELECT 1), + t2 AS ( + WITH t AS (SELECT 2), + t2 AS ( + WITH t AS (SELECT 3) + SELECT * FROM t + ) + SELECT * FROM t2 + ) +SELECT * FROM t2 +-- !query 14 schema +struct<2:int> +-- !query 14 output +2 + + +-- !query 15 +WITH t(c) AS (SELECT 1) +SELECT max(c) FROM ( + WITH t(c) AS (SELECT 2) + SELECT * FROM t +) +-- !query 15 schema +struct +-- !query 15 output +2 + + +-- !query 16 +WITH t(c) AS (SELECT 1) +SELECT sum(c) FROM ( + SELECT max(c) AS c FROM ( + WITH t(c) AS (SELECT 2) + SELECT * FROM t + ) +) +-- !query 16 schema +struct +-- !query 16 output +2 + + +-- !query 17 +WITH t(c) AS (SELECT 1) +SELECT sum(c) FROM ( + WITH t(c) AS (SELECT 2) + SELECT max(c) AS c FROM ( + WITH t(c) AS (SELECT 3) + SELECT * FROM t + ) +) +-- !query 17 schema +struct +-- !query 17 output +3 + + +-- !query 18 +WITH t AS (SELECT 1) +SELECT ( + WITH t AS (SELECT 2) + SELECT * FROM t +) +-- !query 18 schema +struct +-- !query 18 output +1 + + +-- !query 19 +WITH t AS (SELECT 1) +SELECT ( + SELECT ( + WITH t AS (SELECT 2) + SELECT * FROM t + ) +) +-- !query 19 schema +struct +-- !query 19 output +1 + + +-- !query 20 +WITH t AS (SELECT 1) +SELECT ( + WITH t AS (SELECT 2) + SELECT ( + WITH t AS (SELECT 3) + SELECT * FROM t + ) +) +-- !query 20 schema +struct +-- !query 20 output +1 + + +-- !query 21 DROP VIEW IF EXISTS t --- !query 8 schema +-- !query 21 schema struct<> --- !query 8 output +-- !query 21 output --- !query 9 +-- !query 22 DROP VIEW IF EXISTS t2 --- !query 9 schema +-- !query 22 schema struct<> --- !query 9 output +-- !query 22 output