From 26897558919e079cb811785fa0f779d6e5800f17 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Mon, 8 Aug 2016 14:40:53 -0700 Subject: [PATCH] [SPARK-16955][SQL] Using ordinals in ORDER BY and GROUP BY causes an analysis error --- .../spark/sql/catalyst/analysis/Analyzer.scala | 9 ++++++++- .../scala/org/apache/spark/sql/SQLQuerySuite.scala | 13 +++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 14a2a323c885..f39ea0350063 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -730,6 +730,11 @@ class Analyzer( } Sort(newOrders, global, child) + // Eliminate the useless position numbers + case s @ Sort(orders, global, child) + if !conf.orderByOrdinal && orders.exists(o => IntegerIndex.unapply(o.child).nonEmpty) => + Sort(orders.filterNot(o => IntegerIndex.unapply(o.child).nonEmpty), global, child) + // Replace the index with the corresponding expression in aggregateExpressions. The index is // a 1-base position of aggregateExpressions, which is output columns (select expression) case a @ Aggregate(groups, aggs, child) @@ -1252,7 +1257,9 @@ class Analyzer( case ae: AnalysisException => filter } - case sort @ Sort(sortOrder, global, aggregate: Aggregate) if aggregate.resolved => + // If there exists ordinal sort orders, it's not resolved completely yet. See SPARK-16955. + case sort @ Sort(sortOrder, global, aggregate: Aggregate) if aggregate.resolved && + sortOrder.forall(x => IntegerIndex.unapply(x.child).isEmpty) => // Try resolving the ordering as though it is in the aggregate clause. try { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index eac588fff2fc..93b3fb111590 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -495,6 +495,19 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { ) } + test("SPARK-16955: Using ordinals in ORDER BY and GROUP BY causes an analysis error") { + withSQLConf(SQLConf.ORDER_BY_ORDINAL.key -> "true") { + checkAnswer( + sql("SELECT a, avg(a) FROM (SELECT * FROM VALUES 1,2,3 T(a)) U GROUP BY 1 ORDER BY 1 DESC"), + sql("SELECT a, avg(a) FROM (SELECT * FROM VALUES 1,2,3 T(a)) U GROUP BY a ORDER BY a DESC")) + } + withSQLConf(SQLConf.ORDER_BY_ORDINAL.key -> "false") { + checkAnswer( + sql("SELECT a, avg(a) FROM (SELECT * FROM VALUES 1,2,3 T(a)) U GROUP BY 1 ORDER BY 1 DESC"), + sql("SELECT a, avg(a) FROM (SELECT * FROM VALUES 1,2,3 T(a)) U GROUP BY a")) + } + } + test("select *") { checkAnswer( sql("SELECT * FROM testData"),