Skip to content

Commit 34def69

Browse files
Add more unit test and comments
1 parent c695760 commit 34def69

8 files changed

+103
-2
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,28 +193,34 @@ class Analyzer(
193193
}
194194

195195
def apply(plan: LogicalPlan): LogicalPlan = plan transform {
196+
case a if !a.childrenResolved => a // be sure all of the children are resolved.
196197
case a: Cube =>
197198
GroupingSets(bitmasks(a), a.groupByExprs, a.child, a.aggregations)
198199
case a: Rollup =>
199200
GroupingSets(bitmasks(a), a.groupByExprs, a.child, a.aggregations)
200201
case x: GroupingSets =>
201202
val gid = AttributeReference(VirtualColumn.groupingIdName, IntegerType, false)()
202203

204+
// the complex expression (non-attribute expressions) in the GROUP BY keys
203205
val nonAttributeGroupByExpression = new ArrayBuffer[Alias]()
204206
val groupByExprPairs = x.groupByExprs.map(_ match {
205207
case e: NamedExpression => (e, e)
206208
case other => {
207209
val alias = Alias(other, other.toString)()
208210
nonAttributeGroupByExpression += alias
209-
(other, alias.toAttribute)
211+
(other, alias.toAttribute) // (Aliased complex expression, the associated attribute)
210212
}
211213
})
212214

215+
// substitute the complex expression for aggregations.
213216
val aggregation = x.aggregations.map(expr => expr.transformDown {
214217
case e => groupByExprPairs.find(_._1.semanticEquals(e)).map(_._2).getOrElse(e)
215218
}.asInstanceOf[NamedExpression])
216219

220+
// substitute the group by expressions.
217221
val newGroupByExprs = groupByExprPairs.map(_._2)
222+
223+
// add an additional projection if contains the complex expression in the GROUP BY keys
218224
val child = if (nonAttributeGroupByExpression.length > 0) {
219225
Project(x.child.output ++ nonAttributeGroupByExpression, x.child)
220226
} else {

sql/hive/src/test/resources/golden/SPARK-8976 Wrong Result for Rollup-0-a78e3dbf242f240249e36b3d3fd0926a renamed to sql/hive/src/test/resources/golden/SPARK-8976 Wrong Result for CUBE #1-0-63b61fb3f0e74226001ad279be440864

File renamed without changes.
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
1 NULL -3 2
2+
1 NULL -1 2
3+
1 NULL 3 2
4+
1 NULL 4 2
5+
1 NULL 5 2
6+
1 NULL 6 2
7+
1 NULL 12 2
8+
1 NULL 14 2
9+
1 NULL 15 2
10+
1 NULL 22 2
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
1 NULL -3 2
2+
1 NULL -1 2
3+
1 NULL 3 2
4+
1 NULL 4 2
5+
1 NULL 5 2
6+
1 NULL 6 2
7+
1 NULL 12 2
8+
1 NULL 14 2
9+
1 NULL 15 2
10+
1 NULL 22 2
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
500 NULL 0
2+
91 0 1
3+
84 1 1
4+
105 2 1
5+
113 3 1
6+
107 4 1
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
1 0 5 3
2+
1 0 15 3
3+
1 0 25 3
4+
1 0 60 3
5+
1 0 75 3
6+
1 0 80 3
7+
1 0 100 3
8+
1 0 140 3
9+
1 0 145 3
10+
1 0 150 3
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
1 0 5 3
2+
1 0 15 3
3+
1 0 25 3
4+
1 0 60 3
5+
1 0 75 3
6+
1 0 80 3
7+
1 0 100 3
8+
1 0 140 3
9+
1 0 145 3
10+
1 0 150 3

sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,11 +85,60 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter {
8585
}
8686
}
8787

88-
createQueryTest("SPARK-8976 Wrong Result for Rollup",
88+
createQueryTest("SPARK-8976 Wrong Result for Rollup #1",
8989
"""
9090
SELECT count(*) AS cnt, key % 5,GROUPING__ID FROM src group by key%5 WITH ROLLUP
9191
""".stripMargin)
9292

93+
createQueryTest("SPARK-8976 Wrong Result for Rollup #2",
94+
"""
95+
SELECT
96+
count(*) AS cnt,
97+
key % 5 as k1,
98+
key-5 as k2,
99+
GROUPING__ID as k3
100+
FROM src group by key%5, key-5
101+
WITH ROLLUP ORDER BY cnt, k1, k2, k3 LIMIT 10
102+
""".stripMargin)
103+
104+
createQueryTest("SPARK-8976 Wrong Result for Rollup #3",
105+
"""
106+
SELECT
107+
count(*) AS cnt,
108+
key % 5 as k1,
109+
key-5 as k2,
110+
GROUPING__ID as k3
111+
FROM (SELECT key, key%2, key - 5 FROM src) t group by key%5, key-5
112+
WITH ROLLUP ORDER BY cnt, k1, k2, k3 LIMIT 10
113+
""".stripMargin)
114+
115+
createQueryTest("SPARK-8976 Wrong Result for CUBE #1",
116+
"""
117+
SELECT count(*) AS cnt, key % 5,GROUPING__ID FROM src group by key%5 WITH CUBE
118+
""".stripMargin)
119+
120+
createQueryTest("SPARK-8976 Wrong Result for CUBE #2",
121+
"""
122+
SELECT
123+
count(*) AS cnt,
124+
key % 5 as k1,
125+
key-5 as k2,
126+
GROUPING__ID as k3
127+
FROM (SELECT key, key%2, key - 5 FROM src) t group by key%5, key-5
128+
WITH CUBE ORDER BY cnt, k1, k2, k3 LIMIT 10
129+
""".stripMargin)
130+
131+
createQueryTest("SPARK-8976 Wrong Result for GroupingSet",
132+
"""
133+
SELECT
134+
count(*) AS cnt,
135+
key % 5 as k1,
136+
key-5 as k2,
137+
GROUPING__ID as k3
138+
FROM (SELECT key, key%2, key - 5 FROM src) t group by key%5, key-5
139+
GROUPING SETS (key%5, key-5) ORDER BY cnt, k1, k2, k3 LIMIT 10
140+
""".stripMargin)
141+
93142
createQueryTest("insert table with generator with column name",
94143
"""
95144
| CREATE TABLE gen_tmp (key Int);

0 commit comments

Comments
 (0)