Skip to content

Commit 1272df2

Browse files
peter-tothdongjoon-hyun
authored andcommitted
[SPARK-28002][SQL][FOLLOWUP] Fix duplicate CTE error message and add more test cases
## What changes were proposed in this pull request? This PR adds some more WITH test cases as a follow-up to #24842 ## How was this patch tested? Add new UTs. Closes #24949 from peter-toth/SPARK-28002-follow-up. Authored-by: Peter Toth <peter.toth@gmail.com> Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
1 parent fe75ff8 commit 1272df2

File tree

4 files changed

+121
-44
lines changed

4 files changed

+121
-44
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,12 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
129129
(namedQuery.alias, namedQuery)
130130
}
131131
// Check for duplicate names.
132-
checkDuplicateKeys(ctes, ctx)
132+
val duplicates = ctes.groupBy(_._1).filter(_._2.size > 1).keys
133+
if (duplicates.nonEmpty) {
134+
throw new ParseException(
135+
s"CTE definition can't have duplicate names: ${duplicates.mkString("'", "', '", "'")}.",
136+
ctx)
137+
}
133138
With(plan, ctes)
134139
}
135140

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ class PlanParserSuite extends AnalysisTest {
100100
"cte2" -> ((table("cte1").select(star()), Seq.empty))))
101101
intercept(
102102
"with cte1 (select 1), cte1 as (select 1 from cte1) select * from cte1",
103-
"Found duplicate keys 'cte1'")
103+
"CTE definition can't have duplicate names: 'cte1'.")
104104
}
105105

106106
test("simple select query") {

sql/core/src/test/resources/sql-tests/inputs/cte.sql

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,24 @@ FROM CTE1 t1
3131
WITH t(x) AS (SELECT 1)
3232
SELECT * FROM t WHERE x = 1;
3333

34+
-- CTE with multiple column aliases
35+
WITH t(x, y) AS (SELECT 1, 2)
36+
SELECT * FROM t WHERE x = 1 AND y = 2;
37+
38+
-- CTE with duplicate column aliases
39+
WITH t(x, x) AS (SELECT 1, 2)
40+
SELECT * FROM t;
41+
42+
-- CTE with empty column alias list is not allowed
43+
WITH t() AS (SELECT 1)
44+
SELECT * FROM t;
45+
46+
-- CTEs with duplicate names are not allowed
47+
WITH
48+
t(x) AS (SELECT 1),
49+
t(x) AS (SELECT 2)
50+
SELECT * FROM t;
51+
3452
-- CTE in CTE definition
3553
WITH t as (
3654
WITH t2 AS (SELECT 1)

sql/core/src/test/resources/sql-tests/results/cte.sql.out

Lines changed: 96 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
-- Automatically generated by SQLQueryTestSuite
2-
-- Number of queries: 23
2+
-- Number of queries: 27
33

44

55
-- !query 0
@@ -108,54 +108,108 @@ struct<x:int>
108108

109109

110110
-- !query 9
111+
WITH t(x, y) AS (SELECT 1, 2)
112+
SELECT * FROM t WHERE x = 1 AND y = 2
113+
-- !query 9 schema
114+
struct<x:int,y:int>
115+
-- !query 9 output
116+
1 2
117+
118+
119+
-- !query 10
120+
WITH t(x, x) AS (SELECT 1, 2)
121+
SELECT * FROM t
122+
-- !query 10 schema
123+
struct<x:int,x:int>
124+
-- !query 10 output
125+
1 2
126+
127+
128+
-- !query 11
129+
WITH t() AS (SELECT 1)
130+
SELECT * FROM t
131+
-- !query 11 schema
132+
struct<>
133+
-- !query 11 output
134+
org.apache.spark.sql.catalyst.parser.ParseException
135+
136+
no viable alternative at input 'WITH t()'(line 1, pos 7)
137+
138+
== SQL ==
139+
WITH t() AS (SELECT 1)
140+
-------^^^
141+
SELECT * FROM t
142+
143+
144+
-- !query 12
145+
WITH
146+
t(x) AS (SELECT 1),
147+
t(x) AS (SELECT 2)
148+
SELECT * FROM t
149+
-- !query 12 schema
150+
struct<>
151+
-- !query 12 output
152+
org.apache.spark.sql.catalyst.parser.ParseException
153+
154+
CTE definition can't have duplicate names: 't'.(line 1, pos 0)
155+
156+
== SQL ==
157+
WITH
158+
^^^
159+
t(x) AS (SELECT 1),
160+
t(x) AS (SELECT 2)
161+
SELECT * FROM t
162+
163+
164+
-- !query 13
111165
WITH t as (
112166
WITH t2 AS (SELECT 1)
113167
SELECT * FROM t2
114168
)
115169
SELECT * FROM t
116-
-- !query 9 schema
170+
-- !query 13 schema
117171
struct<1:int>
118-
-- !query 9 output
172+
-- !query 13 output
119173
1
120174

121175

122-
-- !query 10
176+
-- !query 14
123177
SELECT max(c) FROM (
124178
WITH t(c) AS (SELECT 1)
125179
SELECT * FROM t
126180
)
127-
-- !query 10 schema
181+
-- !query 14 schema
128182
struct<max(c):int>
129-
-- !query 10 output
183+
-- !query 14 output
130184
1
131185

132186

133-
-- !query 11
187+
-- !query 15
134188
SELECT (
135189
WITH t AS (SELECT 1)
136190
SELECT * FROM t
137191
)
138-
-- !query 11 schema
192+
-- !query 15 schema
139193
struct<scalarsubquery():int>
140-
-- !query 11 output
194+
-- !query 15 output
141195
1
142196

143197

144-
-- !query 12
198+
-- !query 16
145199
WITH
146200
t AS (SELECT 1),
147201
t2 AS (
148202
WITH t AS (SELECT 2)
149203
SELECT * FROM t
150204
)
151205
SELECT * FROM t2
152-
-- !query 12 schema
206+
-- !query 16 schema
153207
struct<1:int>
154-
-- !query 12 output
208+
-- !query 16 output
155209
1
156210

157211

158-
-- !query 13
212+
-- !query 17
159213
WITH
160214
t(c) AS (SELECT 1),
161215
t2 AS (
@@ -167,13 +221,13 @@ WITH
167221
)
168222
)
169223
SELECT * FROM t2
170-
-- !query 13 schema
224+
-- !query 17 schema
171225
struct<scalarsubquery():int>
172-
-- !query 13 output
226+
-- !query 17 output
173227
1
174228

175229

176-
-- !query 14
230+
-- !query 18
177231
WITH
178232
t AS (SELECT 1),
179233
t2 AS (
@@ -185,39 +239,39 @@ WITH
185239
SELECT * FROM t2
186240
)
187241
SELECT * FROM t2
188-
-- !query 14 schema
242+
-- !query 18 schema
189243
struct<2:int>
190-
-- !query 14 output
244+
-- !query 18 output
191245
2
192246

193247

194-
-- !query 15
248+
-- !query 19
195249
WITH t(c) AS (SELECT 1)
196250
SELECT max(c) FROM (
197251
WITH t(c) AS (SELECT 2)
198252
SELECT * FROM t
199253
)
200-
-- !query 15 schema
254+
-- !query 19 schema
201255
struct<max(c):int>
202-
-- !query 15 output
256+
-- !query 19 output
203257
2
204258

205259

206-
-- !query 16
260+
-- !query 20
207261
WITH t(c) AS (SELECT 1)
208262
SELECT sum(c) FROM (
209263
SELECT max(c) AS c FROM (
210264
WITH t(c) AS (SELECT 2)
211265
SELECT * FROM t
212266
)
213267
)
214-
-- !query 16 schema
268+
-- !query 20 schema
215269
struct<sum(c):bigint>
216-
-- !query 16 output
270+
-- !query 20 output
217271
2
218272

219273

220-
-- !query 17
274+
-- !query 21
221275
WITH t(c) AS (SELECT 1)
222276
SELECT sum(c) FROM (
223277
WITH t(c) AS (SELECT 2)
@@ -226,39 +280,39 @@ SELECT sum(c) FROM (
226280
SELECT * FROM t
227281
)
228282
)
229-
-- !query 17 schema
283+
-- !query 21 schema
230284
struct<sum(c):bigint>
231-
-- !query 17 output
285+
-- !query 21 output
232286
3
233287

234288

235-
-- !query 18
289+
-- !query 22
236290
WITH t AS (SELECT 1)
237291
SELECT (
238292
WITH t AS (SELECT 2)
239293
SELECT * FROM t
240294
)
241-
-- !query 18 schema
295+
-- !query 22 schema
242296
struct<scalarsubquery():int>
243-
-- !query 18 output
297+
-- !query 22 output
244298
1
245299

246300

247-
-- !query 19
301+
-- !query 23
248302
WITH t AS (SELECT 1)
249303
SELECT (
250304
SELECT (
251305
WITH t AS (SELECT 2)
252306
SELECT * FROM t
253307
)
254308
)
255-
-- !query 19 schema
309+
-- !query 23 schema
256310
struct<scalarsubquery():int>
257-
-- !query 19 output
311+
-- !query 23 output
258312
1
259313

260314

261-
-- !query 20
315+
-- !query 24
262316
WITH t AS (SELECT 1)
263317
SELECT (
264318
WITH t AS (SELECT 2)
@@ -267,23 +321,23 @@ SELECT (
267321
SELECT * FROM t
268322
)
269323
)
270-
-- !query 20 schema
324+
-- !query 24 schema
271325
struct<scalarsubquery():int>
272-
-- !query 20 output
326+
-- !query 24 output
273327
1
274328

275329

276-
-- !query 21
330+
-- !query 25
277331
DROP VIEW IF EXISTS t
278-
-- !query 21 schema
332+
-- !query 25 schema
279333
struct<>
280-
-- !query 21 output
334+
-- !query 25 output
281335

282336

283337

284-
-- !query 22
338+
-- !query 26
285339
DROP VIEW IF EXISTS t2
286-
-- !query 22 schema
340+
-- !query 26 schema
287341
struct<>
288-
-- !query 22 output
342+
-- !query 26 output
289343

0 commit comments

Comments
 (0)