From ac78af14508ac02a74202de564253b927f06c237 Mon Sep 17 00:00:00 2001 From: Dilip Biswal Date: Wed, 14 Oct 2015 14:44:10 -0700 Subject: [PATCH 1/4] [SPARK-10534] ORDER BY clause allows only columns that are present in SELECT statement --- .../spark/sql/catalyst/analysis/Analyzer.scala | 7 ++++++- .../sql/catalyst/analysis/AnalysisSuite.scala | 17 +++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 041ab22827399..4cfb97a1d23f6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -482,7 +482,12 @@ class Analyzer( val newOrdering = resolveSortOrders(ordering, grandchild, throws = true) // Construct a set that contains all of the attributes that we need to evaluate the // ordering. - val requiredAttributes = AttributeSet(newOrdering.filter(_.resolved)) + + val resolvedAttributes = + newOrdering.flatMap(_.collect {case a : AttributeReference if a.resolved => a}) + + val requiredAttributes = AttributeSet(resolvedAttributes) + // Figure out which ones are missing from the projection, so that we can add them and // remove them after the sort. val missingInProject = requiredAttributes -- child.output diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala index 820b336aac759..168d51bf5e2f7 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala @@ -135,4 +135,21 @@ class AnalysisSuite extends AnalysisTest { plan = testRelation.select(CreateStructUnsafe(Seq(a, (a + 1).as("a+1"))).as("col")) checkAnalysis(plan, plan) } + + test("SPARK-10534: resolve attribute references in order by clause") { + + val a = testRelation2.output.head + val c = testRelation2.output.toArray.apply(2) + + val sortProjected = Floor(Cast(Floor(c), DoubleType)) + val projected = Alias(a, "a")() + val plan = testRelation2.select(a).orderBy(SortOrder(Floor(Floor(c)), Ascending)) + + val expected = + Project(Seq(a), + Sort(Seq(SortOrder(sortProjected, Ascending)), true, + Project(Seq(a, c), testRelation2))) + checkAnalysis(plan, expected) + + } } From 6f97fb7c9fce2920641e5b9dc63b2830e8037702 Mon Sep 17 00:00:00 2001 From: Dilip Biswal Date: Thu, 15 Oct 2015 01:11:02 -0700 Subject: [PATCH 2/4] Fix based on code review comments --- .../spark/sql/catalyst/analysis/Analyzer.scala | 5 +---- .../sql/catalyst/analysis/AnalysisSuite.scala | 15 ++++----------- 2 files changed, 5 insertions(+), 15 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 4cfb97a1d23f6..ccdf806e27d73 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -483,10 +483,7 @@ class Analyzer( // Construct a set that contains all of the attributes that we need to evaluate the // ordering. - val resolvedAttributes = - newOrdering.flatMap(_.collect {case a : AttributeReference if a.resolved => a}) - - val requiredAttributes = AttributeSet(resolvedAttributes) + val requiredAttributes = AttributeSet(newOrdering).filter(_.resolved) // Figure out which ones are missing from the projection, so that we can add them and // remove them after the sort. diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala index 168d51bf5e2f7..f7a49cda98291 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala @@ -137,19 +137,12 @@ class AnalysisSuite extends AnalysisTest { } test("SPARK-10534: resolve attribute references in order by clause") { + val a = testRelation2.output(0) + val c = testRelation2.output(2) - val a = testRelation2.output.head - val c = testRelation2.output.toArray.apply(2) + val plan = testRelation2.select(c).orderBy(Floor(a).asc) + val expected = testRelation2.select(c, a).orderBy(Floor(a.cast(DoubleType)).asc).select(c) - val sortProjected = Floor(Cast(Floor(c), DoubleType)) - val projected = Alias(a, "a")() - val plan = testRelation2.select(a).orderBy(SortOrder(Floor(Floor(c)), Ascending)) - - val expected = - Project(Seq(a), - Sort(Seq(SortOrder(sortProjected, Ascending)), true, - Project(Seq(a, c), testRelation2))) checkAnalysis(plan, expected) - } } From 9ed26d16ee19c514f3d5281a1e34cc72661f111e Mon Sep 17 00:00:00 2001 From: Dilip Biswal Date: Thu, 15 Oct 2015 11:05:30 -0700 Subject: [PATCH 3/4] Remove blank lines --- .../scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 2 -- 1 file changed, 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index ccdf806e27d73..f3af6099fbe65 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -482,9 +482,7 @@ class Analyzer( val newOrdering = resolveSortOrders(ordering, grandchild, throws = true) // Construct a set that contains all of the attributes that we need to evaluate the // ordering. - val requiredAttributes = AttributeSet(newOrdering).filter(_.resolved) - // Figure out which ones are missing from the projection, so that we can add them and // remove them after the sort. val missingInProject = requiredAttributes -- child.output From 4f162edae1201179817067a2fef1d469e45283a6 Mon Sep 17 00:00:00 2001 From: Dilip Biswal Date: Thu, 15 Oct 2015 11:19:06 -0700 Subject: [PATCH 4/4] Fix testcase based on review comment --- .../org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala index f7a49cda98291..275295cb3f750 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala @@ -140,7 +140,7 @@ class AnalysisSuite extends AnalysisTest { val a = testRelation2.output(0) val c = testRelation2.output(2) - val plan = testRelation2.select(c).orderBy(Floor(a).asc) + val plan = testRelation2.select('c).orderBy(Floor('a).asc) val expected = testRelation2.select(c, a).orderBy(Floor(a.cast(DoubleType)).asc).select(c) checkAnalysis(plan, expected)