Skip to content

Commit f4ff2d1

Browse files
maryannxuecloud-fan
authored andcommitted
[SPARK-40297][SQL] CTE outer reference nested in CTE main body cannot be resolved
### What changes were proposed in this pull request? This PR fixes a bug where a CTE reference cannot be resolved if this reference occurs in an inner CTE definition nested in the outer CTE's main body FROM clause. E.g., ``` WITH cte_outer AS ( SELECT 1 ) SELECT * FROM ( WITH cte_inner AS ( SELECT * FROM cte_outer ) SELECT * FROM cte_inner ) ``` This fix is to change the `CTESubstitution`'s traverse order from `resolveOperatorsUpWithPruning` to `resolveOperatorsDownWithPruning` and also to recursively call `traverseAndSubstituteCTE` for CTE main body. ### Why are the changes needed? Bug fix. Without the fix an `AnalysisException` would be thrown for CTE queries mentioned above. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Added UTs. Closes #37751 from maryannxue/spark-40297. Authored-by: Maryann Xue <maryann.xue@gmail.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com>
1 parent 25b51aa commit f4ff2d1

File tree

6 files changed

+476
-11
lines changed

6 files changed

+476
-11
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ object CTESubstitution extends Rule[LogicalPlan] {
5656
case _ => false
5757
}
5858
val cteDefs = ArrayBuffer.empty[CTERelationDef]
59-
val (substituted, lastSubstituted) =
59+
val (substituted, firstSubstituted) =
6060
LegacyBehaviorPolicy.withName(conf.getConf(LEGACY_CTE_PRECEDENCE_POLICY)) match {
6161
case LegacyBehaviorPolicy.EXCEPTION =>
6262
assertNoNameConflictsInCTE(plan)
@@ -68,12 +68,17 @@ object CTESubstitution extends Rule[LogicalPlan] {
6868
}
6969
if (cteDefs.isEmpty) {
7070
substituted
71-
} else if (substituted eq lastSubstituted.get) {
71+
} else if (substituted eq firstSubstituted.get) {
7272
WithCTE(substituted, cteDefs.toSeq)
7373
} else {
7474
var done = false
7575
substituted.resolveOperatorsWithPruning(_ => !done) {
76-
case p if p eq lastSubstituted.get =>
76+
case p if p eq firstSubstituted.get =>
77+
// `firstSubstituted` is the parent of all other CTEs (if any).
78+
done = true
79+
WithCTE(p, cteDefs.toSeq)
80+
case p if p.children.count(_.containsPattern(CTE)) > 1 =>
81+
// This is the first common parent of all CTEs.
7782
done = true
7883
WithCTE(p, cteDefs.toSeq)
7984
}
@@ -181,21 +186,28 @@ object CTESubstitution extends Rule[LogicalPlan] {
181186
isCommand: Boolean,
182187
outerCTEDefs: Seq[(String, CTERelationDef)],
183188
cteDefs: ArrayBuffer[CTERelationDef]): (LogicalPlan, Option[LogicalPlan]) = {
184-
var lastSubstituted: Option[LogicalPlan] = None
185-
val newPlan = plan.resolveOperatorsUpWithPruning(
189+
var firstSubstituted: Option[LogicalPlan] = None
190+
val newPlan = plan.resolveOperatorsDownWithPruning(
186191
_.containsAnyPattern(UNRESOLVED_WITH, PLAN_EXPRESSION)) {
187192
case UnresolvedWith(child: LogicalPlan, relations) =>
188193
val resolvedCTERelations =
189-
resolveCTERelations(relations, isLegacy = false, isCommand, outerCTEDefs, cteDefs)
190-
lastSubstituted = Some(substituteCTE(child, isCommand, resolvedCTERelations))
191-
lastSubstituted.get
194+
resolveCTERelations(relations, isLegacy = false, isCommand, outerCTEDefs, cteDefs) ++
195+
outerCTEDefs
196+
val substituted = substituteCTE(
197+
traverseAndSubstituteCTE(child, isCommand, resolvedCTERelations, cteDefs)._1,
198+
isCommand,
199+
resolvedCTERelations)
200+
if (firstSubstituted.isEmpty) {
201+
firstSubstituted = Some(substituted)
202+
}
203+
substituted
192204

193205
case other =>
194206
other.transformExpressionsWithPruning(_.containsPattern(PLAN_EXPRESSION)) {
195207
case e: SubqueryExpression => e.withNewPlan(apply(e.plan))
196208
}
197209
}
198-
(newPlan, lastSubstituted)
210+
(newPlan, firstSubstituted)
199211
}
200212

201213
private def resolveCTERelations(

sql/core/src/test/resources/sql-tests/inputs/cte-nested.sql

Lines changed: 58 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,4 +146,61 @@ WITH
146146
)
147147
SELECT * FROM t3
148148
)
149-
SELECT * FROM t2;
149+
SELECT * FROM t2;
150+
151+
-- CTE nested in CTE main body FROM clause references outer CTE def
152+
WITH cte_outer AS (
153+
SELECT 1
154+
)
155+
SELECT * FROM (
156+
WITH cte_inner AS (
157+
SELECT * FROM cte_outer
158+
)
159+
SELECT * FROM cte_inner
160+
);
161+
162+
-- CTE double nested in CTE main body FROM clause references outer CTE def
163+
WITH cte_outer AS (
164+
SELECT 1
165+
)
166+
SELECT * FROM (
167+
WITH cte_inner AS (
168+
SELECT * FROM (
169+
WITH cte_inner_inner AS (
170+
SELECT * FROM cte_outer
171+
)
172+
SELECT * FROM cte_inner_inner
173+
)
174+
)
175+
SELECT * FROM cte_inner
176+
);
177+
178+
-- Invalid reference to invisible CTE def nested CTE def
179+
WITH cte_outer AS (
180+
WITH cte_invisible_inner AS (
181+
SELECT 1
182+
)
183+
SELECT * FROM cte_invisible_inner
184+
)
185+
SELECT * FROM (
186+
WITH cte_inner AS (
187+
SELECT * FROM cte_invisible_inner
188+
)
189+
SELECT * FROM cte_inner
190+
);
191+
192+
-- Invalid reference to invisible CTE def nested CTE def (in FROM)
193+
WITH cte_outer AS (
194+
SELECT * FROM (
195+
WITH cte_invisible_inner AS (
196+
SELECT 1
197+
)
198+
SELECT * FROM cte_invisible_inner
199+
)
200+
)
201+
SELECT * FROM (
202+
WITH cte_inner AS (
203+
SELECT * FROM cte_invisible_inner
204+
)
205+
SELECT * FROM cte_inner
206+
);

sql/core/src/test/resources/sql-tests/results/cte-legacy.sql.out

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,3 +233,83 @@ struct<>
233233
-- !query output
234234
org.apache.spark.sql.AnalysisException
235235
Table or view not found: t1; line 5 pos 20
236+
237+
238+
-- !query
239+
WITH cte_outer AS (
240+
SELECT 1
241+
)
242+
SELECT * FROM (
243+
WITH cte_inner AS (
244+
SELECT * FROM cte_outer
245+
)
246+
SELECT * FROM cte_inner
247+
)
248+
-- !query schema
249+
struct<1:int>
250+
-- !query output
251+
1
252+
253+
254+
-- !query
255+
WITH cte_outer AS (
256+
SELECT 1
257+
)
258+
SELECT * FROM (
259+
WITH cte_inner AS (
260+
SELECT * FROM (
261+
WITH cte_inner_inner AS (
262+
SELECT * FROM cte_outer
263+
)
264+
SELECT * FROM cte_inner_inner
265+
)
266+
)
267+
SELECT * FROM cte_inner
268+
)
269+
-- !query schema
270+
struct<>
271+
-- !query output
272+
org.apache.spark.sql.AnalysisException
273+
Table or view not found: cte_outer; line 8 pos 22
274+
275+
276+
-- !query
277+
WITH cte_outer AS (
278+
WITH cte_invisible_inner AS (
279+
SELECT 1
280+
)
281+
SELECT * FROM cte_invisible_inner
282+
)
283+
SELECT * FROM (
284+
WITH cte_inner AS (
285+
SELECT * FROM cte_invisible_inner
286+
)
287+
SELECT * FROM cte_inner
288+
)
289+
-- !query schema
290+
struct<>
291+
-- !query output
292+
org.apache.spark.sql.AnalysisException
293+
Table or view not found: cte_invisible_inner; line 9 pos 18
294+
295+
296+
-- !query
297+
WITH cte_outer AS (
298+
SELECT * FROM (
299+
WITH cte_invisible_inner AS (
300+
SELECT 1
301+
)
302+
SELECT * FROM cte_invisible_inner
303+
)
304+
)
305+
SELECT * FROM (
306+
WITH cte_inner AS (
307+
SELECT * FROM cte_invisible_inner
308+
)
309+
SELECT * FROM cte_inner
310+
)
311+
-- !query schema
312+
struct<>
313+
-- !query output
314+
org.apache.spark.sql.AnalysisException
315+
Table or view not found: cte_invisible_inner; line 11 pos 18

sql/core/src/test/resources/sql-tests/results/cte-nested.sql.out

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,3 +240,82 @@ SELECT * FROM t2
240240
struct<1:int>
241241
-- !query output
242242
1
243+
244+
245+
-- !query
246+
WITH cte_outer AS (
247+
SELECT 1
248+
)
249+
SELECT * FROM (
250+
WITH cte_inner AS (
251+
SELECT * FROM cte_outer
252+
)
253+
SELECT * FROM cte_inner
254+
)
255+
-- !query schema
256+
struct<1:int>
257+
-- !query output
258+
1
259+
260+
261+
-- !query
262+
WITH cte_outer AS (
263+
SELECT 1
264+
)
265+
SELECT * FROM (
266+
WITH cte_inner AS (
267+
SELECT * FROM (
268+
WITH cte_inner_inner AS (
269+
SELECT * FROM cte_outer
270+
)
271+
SELECT * FROM cte_inner_inner
272+
)
273+
)
274+
SELECT * FROM cte_inner
275+
)
276+
-- !query schema
277+
struct<1:int>
278+
-- !query output
279+
1
280+
281+
282+
-- !query
283+
WITH cte_outer AS (
284+
WITH cte_invisible_inner AS (
285+
SELECT 1
286+
)
287+
SELECT * FROM cte_invisible_inner
288+
)
289+
SELECT * FROM (
290+
WITH cte_inner AS (
291+
SELECT * FROM cte_invisible_inner
292+
)
293+
SELECT * FROM cte_inner
294+
)
295+
-- !query schema
296+
struct<>
297+
-- !query output
298+
org.apache.spark.sql.AnalysisException
299+
Table or view not found: cte_invisible_inner; line 9 pos 18
300+
301+
302+
-- !query
303+
WITH cte_outer AS (
304+
SELECT * FROM (
305+
WITH cte_invisible_inner AS (
306+
SELECT 1
307+
)
308+
SELECT * FROM cte_invisible_inner
309+
)
310+
)
311+
SELECT * FROM (
312+
WITH cte_inner AS (
313+
SELECT * FROM cte_invisible_inner
314+
)
315+
SELECT * FROM cte_inner
316+
)
317+
-- !query schema
318+
struct<>
319+
-- !query output
320+
org.apache.spark.sql.AnalysisException
321+
Table or view not found: cte_invisible_inner; line 11 pos 18

sql/core/src/test/resources/sql-tests/results/cte-nonlegacy.sql.out

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,3 +232,82 @@ SELECT * FROM t2
232232
struct<1:int>
233233
-- !query output
234234
1
235+
236+
237+
-- !query
238+
WITH cte_outer AS (
239+
SELECT 1
240+
)
241+
SELECT * FROM (
242+
WITH cte_inner AS (
243+
SELECT * FROM cte_outer
244+
)
245+
SELECT * FROM cte_inner
246+
)
247+
-- !query schema
248+
struct<1:int>
249+
-- !query output
250+
1
251+
252+
253+
-- !query
254+
WITH cte_outer AS (
255+
SELECT 1
256+
)
257+
SELECT * FROM (
258+
WITH cte_inner AS (
259+
SELECT * FROM (
260+
WITH cte_inner_inner AS (
261+
SELECT * FROM cte_outer
262+
)
263+
SELECT * FROM cte_inner_inner
264+
)
265+
)
266+
SELECT * FROM cte_inner
267+
)
268+
-- !query schema
269+
struct<1:int>
270+
-- !query output
271+
1
272+
273+
274+
-- !query
275+
WITH cte_outer AS (
276+
WITH cte_invisible_inner AS (
277+
SELECT 1
278+
)
279+
SELECT * FROM cte_invisible_inner
280+
)
281+
SELECT * FROM (
282+
WITH cte_inner AS (
283+
SELECT * FROM cte_invisible_inner
284+
)
285+
SELECT * FROM cte_inner
286+
)
287+
-- !query schema
288+
struct<>
289+
-- !query output
290+
org.apache.spark.sql.AnalysisException
291+
Table or view not found: cte_invisible_inner; line 9 pos 18
292+
293+
294+
-- !query
295+
WITH cte_outer AS (
296+
SELECT * FROM (
297+
WITH cte_invisible_inner AS (
298+
SELECT 1
299+
)
300+
SELECT * FROM cte_invisible_inner
301+
)
302+
)
303+
SELECT * FROM (
304+
WITH cte_inner AS (
305+
SELECT * FROM cte_invisible_inner
306+
)
307+
SELECT * FROM cte_inner
308+
)
309+
-- !query schema
310+
struct<>
311+
-- !query output
312+
org.apache.spark.sql.AnalysisException
313+
Table or view not found: cte_invisible_inner; line 11 pos 18

0 commit comments

Comments
 (0)