Skip to content

Commit 65b466c

Browse files
vladimirg-dbcloud-fan
authored andcommitted
[SPARK-54031][SQL] Add new golden file tests for analysis edge-cases
### What changes were proposed in this pull request? Add new golden file tests for analysis edge-cases discovered during Analyzer support and development. ### Why are the changes needed? Harden Spark testing coverage. ### Does this PR introduce _any_ user-facing change? Test-only change. ### How was this patch tested? Adding new golden files. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #52734 from vladimirg-db/vladimir-golubev_data/new-golden-files-for-analyzer-edge-cases-2. Authored-by: Vladimir Golubev <vladimir.golubev@databricks.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com>
1 parent 7ac2b54 commit 65b466c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+2728
-3
lines changed
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
-- Automatically generated by SQLQueryTestSuite
2+
-- !query
3+
CREATE TABLE t1(col1 INT, col2 STRING)
4+
-- !query analysis
5+
CreateDataSourceTableCommand `spark_catalog`.`default`.`t1`, false
6+
7+
8+
-- !query
9+
CREATE TABLE t2(col1 STRUCT<a: STRING>, a STRING)
10+
-- !query analysis
11+
CreateDataSourceTableCommand `spark_catalog`.`default`.`t2`, false
12+
13+
14+
-- !query
15+
SELECT LEN(LOWER('X')) AS a, 1 AS b, b AS c GROUP BY LOWER('X') ORDER BY LOWER('X')
16+
-- !query analysis
17+
Project [a#x, b#x, c#x]
18+
+- Sort [lower(X)#x ASC NULLS FIRST], true
19+
+- Project [len(lower(X)#x) AS a#x, b#x, b#x AS c#x, lower(X)#x AS lower(X)#x]
20+
+- Project [lower(X)#x, 1 AS b#x]
21+
+- Aggregate [lower(X)], [lower(X) AS lower(X)#x]
22+
+- OneRowRelation
23+
24+
25+
-- !query
26+
SELECT LEN(LOWER('X')) AS a, 1 AS b, b AS c GROUP BY LOWER('X') HAVING LOWER('X') = 'x'
27+
-- !query analysis
28+
Project [a#x, b#x, c#x]
29+
+- Filter (lower(X)#x = x)
30+
+- Project [len(lower(X)#x) AS a#x, b#x, b#x AS c#x, lower(X)#x AS lower(X)#x]
31+
+- Project [lower(X)#x, 1 AS b#x]
32+
+- Aggregate [lower(X)], [lower(X) AS lower(X)#x]
33+
+- OneRowRelation
34+
35+
36+
-- !query
37+
SELECT col1.field, field FROM VALUES(named_struct('field', 1))
38+
-- !query analysis
39+
Project [field#x, field#x]
40+
+- Project [col1#x, col1#x.field AS field#x]
41+
+- LocalRelation [col1#x]
42+
43+
44+
-- !query
45+
SELECT col1.field, field FROM VALUES(map('field', 1))
46+
-- !query analysis
47+
Project [field#x, field#x]
48+
+- Project [col1#x, col1#x[field] AS field#x]
49+
+- LocalRelation [col1#x]
50+
51+
52+
-- !query
53+
SELECT COUNT(col1) as alias, SUM(col1) + alias FROM t1 GROUP BY ALL
54+
-- !query analysis
55+
Project [alias#xL, (sum(col1)#xL + alias#xL) AS (sum(col1) + lateralAliasReference(alias))#xL]
56+
+- Project [count(col1)#xL, sum(col1)#xL, count(col1)#xL AS alias#xL]
57+
+- Aggregate [count(col1#x) AS count(col1)#xL, sum(col1#x) AS sum(col1)#xL]
58+
+- SubqueryAlias spark_catalog.default.t1
59+
+- Relation spark_catalog.default.t1[col1#x,col2#x] parquet
60+
61+
62+
-- !query
63+
SELECT COUNT(col1) as alias, SUM(col1) + alias, SUM(col1) + col1 FROM t1 GROUP BY ALL
64+
-- !query analysis
65+
org.apache.spark.sql.catalyst.ExtendedAnalysisException
66+
{
67+
"errorClass" : "UNRESOLVED_ALL_IN_GROUP_BY",
68+
"sqlState" : "42803",
69+
"queryContext" : [ {
70+
"objectType" : "",
71+
"objectName" : "",
72+
"startIndex" : 74,
73+
"stopIndex" : 85,
74+
"fragment" : "GROUP BY ALL"
75+
} ]
76+
}
77+
78+
79+
-- !query
80+
DROP TABLE t1
81+
-- !query analysis
82+
DropTable false, false
83+
+- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t1
84+
85+
86+
-- !query
87+
DROP TABLE t2
88+
-- !query analysis
89+
DropTable false, false
90+
+- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t2

sql/core/src/test/resources/sql-tests/analyzer-results/collations-basic.sql.out

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1136,6 +1136,14 @@ Project [try_validate_utf8(collate(utf8_binary#x, utf8_lcase_rtrim)) AS try_vali
11361136
+- Relation spark_catalog.default.t1[s#x,utf8_binary#x,utf8_lcase#x] parquet
11371137

11381138

1139+
-- !query
1140+
SELECT CASE WHEN utf8_lcase = 'XX' THEN 'XX' ELSE utf8_lcase END FROM t1
1141+
-- !query analysis
1142+
Project [CASE WHEN (utf8_lcase#x = XX) THEN XX ELSE utf8_lcase#x END AS CASE WHEN (utf8_lcase = 'XX' collate UTF8_LCASE) THEN 'XX' collate UTF8_LCASE ELSE utf8_lcase END#x]
1143+
+- SubqueryAlias spark_catalog.default.t1
1144+
+- Relation spark_catalog.default.t1[s#x,utf8_binary#x,utf8_lcase#x] parquet
1145+
1146+
11391147
-- !query
11401148
drop table t1
11411149
-- !query analysis

sql/core/src/test/resources/sql-tests/analyzer-results/cte.sql.out

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,12 @@ CreateViewCommand `t3`, select * from t, false, false, LocalTempView, UNSUPPORTE
3030
+- LocalRelation [id#x]
3131

3232

33+
-- !query
34+
create table t4(col1 TIMESTAMP)
35+
-- !query analysis
36+
CreateDataSourceTableCommand `spark_catalog`.`default`.`t4`, false
37+
38+
3339
-- !query
3440
WITH s AS (SELECT 1 FROM s) SELECT * FROM s
3541
-- !query analysis
@@ -1031,6 +1037,94 @@ WithCTE
10311037
+- CTERelationRef xxxx, true, [1#x], false, false, 1
10321038

10331039

1040+
-- !query
1041+
SELECT * FROM (
1042+
WITH cte1 AS (SELECT * FROM t4) SELECT t4.col1 FROM t4 JOIN cte1 USING (col1)
1043+
)
1044+
-- !query analysis
1045+
Project [col1#x]
1046+
+- SubqueryAlias __auto_generated_subquery_name
1047+
+- WithCTE
1048+
:- CTERelationDef xxxx, false
1049+
: +- SubqueryAlias cte1
1050+
: +- Project [col1#x]
1051+
: +- SubqueryAlias spark_catalog.default.t4
1052+
: +- Relation spark_catalog.default.t4[col1#x] parquet
1053+
+- Project [col1#x]
1054+
+- Project [col1#x]
1055+
+- Join Inner, (col1#x = col1#x)
1056+
:- SubqueryAlias spark_catalog.default.t4
1057+
: +- Relation spark_catalog.default.t4[col1#x] parquet
1058+
+- SubqueryAlias cte1
1059+
+- CTERelationRef xxxx, true, [col1#x], false, false
1060+
1061+
1062+
-- !query
1063+
SELECT * FROM (
1064+
WITH cte1 AS (SELECT * FROM t4) SELECT cte1.col1 FROM t4 JOIN cte1 USING (col1)
1065+
)
1066+
-- !query analysis
1067+
Project [col1#x]
1068+
+- SubqueryAlias __auto_generated_subquery_name
1069+
+- WithCTE
1070+
:- CTERelationDef xxxx, false
1071+
: +- SubqueryAlias cte1
1072+
: +- Project [col1#x]
1073+
: +- SubqueryAlias spark_catalog.default.t4
1074+
: +- Relation spark_catalog.default.t4[col1#x] parquet
1075+
+- Project [col1#x]
1076+
+- Project [col1#x, col1#x]
1077+
+- Join Inner, (col1#x = col1#x)
1078+
:- SubqueryAlias spark_catalog.default.t4
1079+
: +- Relation spark_catalog.default.t4[col1#x] parquet
1080+
+- SubqueryAlias cte1
1081+
+- CTERelationRef xxxx, true, [col1#x], false, false
1082+
1083+
1084+
-- !query
1085+
SELECT * FROM (
1086+
WITH cte1 AS (SELECT * FROM t4) SELECT t4.col1 FROM cte1 JOIN t4 USING (col1)
1087+
)
1088+
-- !query analysis
1089+
Project [col1#x]
1090+
+- SubqueryAlias __auto_generated_subquery_name
1091+
+- WithCTE
1092+
:- CTERelationDef xxxx, false
1093+
: +- SubqueryAlias cte1
1094+
: +- Project [col1#x]
1095+
: +- SubqueryAlias spark_catalog.default.t4
1096+
: +- Relation spark_catalog.default.t4[col1#x] parquet
1097+
+- Project [col1#x]
1098+
+- Project [col1#x, col1#x]
1099+
+- Join Inner, (col1#x = col1#x)
1100+
:- SubqueryAlias cte1
1101+
: +- CTERelationRef xxxx, true, [col1#x], false, false
1102+
+- SubqueryAlias spark_catalog.default.t4
1103+
+- Relation spark_catalog.default.t4[col1#x] parquet
1104+
1105+
1106+
-- !query
1107+
SELECT * FROM (
1108+
WITH cte1 AS (SELECT * FROM t4) SELECT cte1.col1 FROM cte1 JOIN t4 USING (col1)
1109+
)
1110+
-- !query analysis
1111+
Project [col1#x]
1112+
+- SubqueryAlias __auto_generated_subquery_name
1113+
+- WithCTE
1114+
:- CTERelationDef xxxx, false
1115+
: +- SubqueryAlias cte1
1116+
: +- Project [col1#x]
1117+
: +- SubqueryAlias spark_catalog.default.t4
1118+
: +- Relation spark_catalog.default.t4[col1#x] parquet
1119+
+- Project [col1#x]
1120+
+- Project [col1#x]
1121+
+- Join Inner, (col1#x = col1#x)
1122+
:- SubqueryAlias cte1
1123+
: +- CTERelationRef xxxx, true, [col1#x], false, false
1124+
+- SubqueryAlias spark_catalog.default.t4
1125+
+- Relation spark_catalog.default.t4[col1#x] parquet
1126+
1127+
10341128
-- !query
10351129
DROP VIEW IF EXISTS t
10361130
-- !query analysis
@@ -1047,3 +1141,10 @@ DropTempViewCommand t2
10471141
DROP VIEW IF EXISTS t3
10481142
-- !query analysis
10491143
DropTempViewCommand t3
1144+
1145+
1146+
-- !query
1147+
DROP TABLE IF EXISTS t4
1148+
-- !query analysis
1149+
DropTable true, false
1150+
+- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t4
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
-- Automatically generated by SQLQueryTestSuite
2+
-- !query
3+
CREATE TABLE t1(col1 STRUCT<a: STRING>, a STRING)
4+
-- !query analysis
5+
CreateDataSourceTableCommand `spark_catalog`.`default`.`t1`, false
6+
7+
8+
-- !query
9+
SELECT col1.a, a FROM t1 ORDER BY a
10+
-- !query analysis
11+
Sort [a#x ASC NULLS FIRST], true
12+
+- Project [col1#x.a AS a#x, a#x]
13+
+- SubqueryAlias spark_catalog.default.t1
14+
+- Relation spark_catalog.default.t1[col1#x,a#x] parquet
15+
16+
17+
-- !query
18+
SELECT col1.a, a FROM t1 ORDER BY col1.a
19+
-- !query analysis
20+
Project [a#x, a#x]
21+
+- Sort [col1#x.a ASC NULLS FIRST], true
22+
+- Project [col1#x.a AS a#x, a#x, col1#x]
23+
+- SubqueryAlias spark_catalog.default.t1
24+
+- Relation spark_catalog.default.t1[col1#x,a#x] parquet
25+
26+
27+
-- !query
28+
SELECT split(col1, '-')[1] AS a FROM VALUES('a-b') ORDER BY split(col1, '-')[1]
29+
-- !query analysis
30+
Project [a#x]
31+
+- Sort [split(col1#x, -, -1)[1] ASC NULLS FIRST], true
32+
+- Project [split(col1#x, -, -1)[1] AS a#x, col1#x]
33+
+- LocalRelation [col1#x]
34+
35+
36+
-- !query
37+
DROP TABLE t1
38+
-- !query analysis
39+
DropTable false, false
40+
+- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t1

sql/core/src/test/resources/sql-tests/analyzer-results/group-by-alias.sql.out

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -543,3 +543,24 @@ Aggregate [a#x], [1 AS a#x]
543543
+- Project [a#x, b#x]
544544
+- SubqueryAlias testData
545545
+- LocalRelation [a#x, b#x]
546+
547+
548+
-- !query
549+
SELECT col1 AS a FROM VALUES (NAMED_STRUCT('f1', 1)) GROUP BY a.f1
550+
-- !query analysis
551+
org.apache.spark.sql.catalyst.ExtendedAnalysisException
552+
{
553+
"errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
554+
"sqlState" : "42703",
555+
"messageParameters" : {
556+
"objectName" : "`a`.`f1`",
557+
"proposal" : "`col1`"
558+
},
559+
"queryContext" : [ {
560+
"objectType" : "",
561+
"objectName" : "",
562+
"startIndex" : 63,
563+
"stopIndex" : 66,
564+
"fragment" : "a.f1"
565+
} ]
566+
}

0 commit comments

Comments
 (0)