From 981224bd9631a0ac633b6967e639197794894fcf Mon Sep 17 00:00:00 2001 From: Ruben Berenguel Montoro Date: Tue, 28 Feb 2017 13:40:47 +0000 Subject: [PATCH 01/13] SPARK-13947 Rewritten error message for clarity. Added extra information in case of attribute name collision --- .../sql/catalyst/analysis/CheckAnalysis.scala | 23 ++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 36ab8b8527b4..54b438043f2d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -338,11 +338,28 @@ trait CheckAnalysis extends PredicateHelper { operator match { case o if o.children.nonEmpty && o.missingInput.nonEmpty => val missingAttributes = o.missingInput.mkString(",") - val input = o.inputSet.mkString(",") + val availableAttributes = o.inputSet.mkString(",") + val missingAttributesHelper = o.missingInput.map(a => (a.name, a.exprId.id)) + + val availableAttributesHelper = o.inputSet.map(a => (a.name, a.exprId.id)) + + val common = (missingAttributesHelper ++ availableAttributesHelper).groupBy(_._1) + val commonNames = common.map(x => (x._1, x._2.toSet)) + .filter(_._2.size > 1) + .map(_._1) + + val repeatedNameHint = if (commonNames.size > 0) { + s"""\nObserve that attribute(s) ${commonNames.mkString(",")} appear in your """ + + """query with at least two different hashes, but same name.\n""" + } else { + "" + } failAnalysis( - s"resolved attribute(s) $missingAttributes missing from $input " + - s"in operator ${operator.simpleString}") + s"""Some resolved attribute(s) are not present among available attributes for a query. + | $missingAttributes is not in $availableAttributes. $repeatedNameHint + The failed query was for operator + | ${operator.simpleString}""") case p @ Project(exprs, _) if containsMultipleGenerators(exprs) => failAnalysis( From ea07688ae1e09505fef58de834c35e08324cb83b Mon Sep 17 00:00:00 2001 From: Ruben Berenguel Montoro Date: Wed, 1 Mar 2017 18:00:46 +0000 Subject: [PATCH 02/13] SPARK-13947 Regenerated the golden file with the updated error message. Added a mental note to disable coursier when running the full Spark test suite --- .../subquery/negative-cases/invalid-correlation.sql.out | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out index 50ae01e181bc..d95059ad844c 100644 --- a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out @@ -63,4 +63,7 @@ where t1a in (select min(t2a) struct<> -- !query 4 output org.apache.spark.sql.AnalysisException -resolved attribute(s) t2b#x missing from min(t2a)#x,t2c#x in operator !Filter predicate-subquery#x [(t2c#x = max(t3c)#x) && (t3b#x > t2b#x)]; +Some resolved attribute(s) are not present among available attributes for a query. + | t2b#x is not in min(t2a)#x,t2c#x. + The failed query was for operator + | !Filter predicate-subquery#x [(t2c#x = max(t3c)#x) && (t3b#x > t2b#x)]; From 65b9596c229ac2b62ecdfeb98e541d2ea92e078d Mon Sep 17 00:00:00 2001 From: Ruben Berenguel Montoro Date: Wed, 1 Mar 2017 22:26:48 +0000 Subject: [PATCH 03/13] SPARK-13947 Regenerated golden file after formatting fix. Fixed the AnalysisErrorSuite test that was failing. Just in case, removed the hardcoding of the names and hashes from this test --- .../sql/catalyst/analysis/CheckAnalysis.scala | 13 +++++++------ .../catalyst/analysis/AnalysisErrorSuite.scala | 18 ++++++++++++------ .../negative-cases/invalid-correlation.sql.out | 8 ++++---- 3 files changed, 23 insertions(+), 16 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 54b438043f2d..c717920f68b6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -349,17 +349,18 @@ trait CheckAnalysis extends PredicateHelper { .map(_._1) val repeatedNameHint = if (commonNames.size > 0) { - s"""\nObserve that attribute(s) ${commonNames.mkString(",")} appear in your """ + - """query with at least two different hashes, but same name.\n""" + s"\n|Observe that attribute(s) ${commonNames.mkString(",")} appear in your " + + "query with at least two different hashes, but same name." } else { "" } failAnalysis( - s"""Some resolved attribute(s) are not present among available attributes for a query. - | $missingAttributes is not in $availableAttributes. $repeatedNameHint - The failed query was for operator - | ${operator.simpleString}""") + s"|Some resolved attribute(s) are not present among available attributes " + + s"for a query.\n" + + s"| $missingAttributes is not in $availableAttributes. $repeatedNameHint\n" + + s"|The failed query was for operator\n" + + s"| ${operator.simpleString}") case p @ Project(exprs, _) if containsMultipleGenerators(exprs) => failAnalysis( diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala index c5e877d12811..7109b3acc6c2 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala @@ -398,16 +398,22 @@ class AnalysisErrorSuite extends AnalysisTest { // CheckAnalysis should throw AnalysisException when Aggregate contains missing attribute(s) // Since we manually construct the logical plan at here and Sum only accept // LongType, DoubleType, and DecimalType. We use LongType as the type of a. - val plan = - Aggregate( + val attrA = AttributeReference("a", LongType)(exprId = ExprId(1)) + val aId = Array[String](attrA.name, attrA.exprId.id.toString) + val otherA = AttributeReference("a", LongType)(exprId = ExprId(2)) + val otherAId = Array[String](otherA.name, otherA.exprId.id.toString) + val plan = Aggregate( Nil, - Alias(sum(AttributeReference("a", LongType)(exprId = ExprId(1))), "b")() :: Nil, - LocalRelation( - AttributeReference("a", LongType)(exprId = ExprId(2)))) + Alias(sum(attrA), "b")() :: Nil, + LocalRelation(otherA)) assert(plan.resolved) - assertAnalysisError(plan, "resolved attribute(s) a#1L missing from a#2L" :: Nil) + assertAnalysisError(plan, + "Some resolved attribute(s) are not present among available " + + "attributes for a query.\n" + + s"""| ${aId.mkString("#")}L is not """ + + s"""in ${otherAId.mkString("#")}L.""" :: Nil) } test("error test for self-join") { diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out index d95059ad844c..1a825e12aed6 100644 --- a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out @@ -63,7 +63,7 @@ where t1a in (select min(t2a) struct<> -- !query 4 output org.apache.spark.sql.AnalysisException -Some resolved attribute(s) are not present among available attributes for a query. - | t2b#x is not in min(t2a)#x,t2c#x. - The failed query was for operator - | !Filter predicate-subquery#x [(t2c#x = max(t3c)#x) && (t3b#x > t2b#x)]; +|Some resolved attribute(s) are not present among available attributes for a query. +| t2b#x is not in min(t2a)#x,t2c#x. +|The failed query was for operator +| !Filter predicate-subquery#x [(t2c#x = max(t3c)#x) && (t3b#x > t2b#x)]; From c99229c101b04e1f9db6800b9d4f641a8f56b794 Mon Sep 17 00:00:00 2001 From: Ruben Berenguel Montoro Date: Wed, 22 Mar 2017 00:19:09 +0100 Subject: [PATCH 04/13] SPARK-13947 Code review improvements --- .../spark/sql/catalyst/analysis/CheckAnalysis.scala | 10 +++++----- .../sql/catalyst/analysis/AnalysisErrorSuite.scala | 11 ++++++----- .../negative-cases/invalid-correlation.sql.out | 9 +++++---- 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index c717920f68b6..7184d29200cd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -356,11 +356,11 @@ trait CheckAnalysis extends PredicateHelper { } failAnalysis( - s"|Some resolved attribute(s) are not present among available attributes " + - s"for a query.\n" + - s"| $missingAttributes is not in $availableAttributes. $repeatedNameHint\n" + - s"|The failed query was for operator\n" + - s"| ${operator.simpleString}") + s"""Some resolved attribute(s) are not present among the available attributes + |for a query. + |$missingAttributes is not in $availableAttributes. $repeatedNameHint + |The failed query was for operator + | ${operator.simpleString}""".stripMargin) case p @ Project(exprs, _) if containsMultipleGenerators(exprs) => failAnalysis( diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala index 7109b3acc6c2..83126a078a54 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala @@ -409,11 +409,12 @@ class AnalysisErrorSuite extends AnalysisTest { assert(plan.resolved) - assertAnalysisError(plan, - "Some resolved attribute(s) are not present among available " + - "attributes for a query.\n" + - s"""| ${aId.mkString("#")}L is not """ + - s"""in ${otherAId.mkString("#")}L.""" :: Nil) + val errorMsg = s"""Some resolved attribute(s) are not present among the available attributes + |for a query. + |${aId.mkString("#")}L is not in ${otherAId.mkString("#")}L.""" + + + assertAnalysisError(plan, errorMsg :: Nil) } test("error test for self-join") { diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out index 1a825e12aed6..2e0cedeb3468 100644 --- a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out @@ -63,7 +63,8 @@ where t1a in (select min(t2a) struct<> -- !query 4 output org.apache.spark.sql.AnalysisException -|Some resolved attribute(s) are not present among available attributes for a query. -| t2b#x is not in min(t2a)#x,t2c#x. -|The failed query was for operator -| !Filter predicate-subquery#x [(t2c#x = max(t3c)#x) && (t3b#x > t2b#x)]; +Some resolved attribute(s) are not present among the available attributes +for a query. +t2b#x is not in min(t2a)#x,t2c#x. +The failed query was for operator + !Filter predicate-subquery#x [(t2c#x = max(t3c)#x) && (t3b#x > t2b#x)]; From 4ac8143cf63f5b4777a66f236824671b0bb05933 Mon Sep 17 00:00:00 2001 From: Ruben Berenguel Montoro Date: Wed, 3 May 2017 01:26:48 +0100 Subject: [PATCH 05/13] =?UTF-8?q?SPARK-13947=20Not=20sure=20about=20these?= =?UTF-8?q?=20changes,=20but=20passes=20the=20local=20suite=20just=20fine.?= =?UTF-8?q?=20Let=E2=80=99s=20see!?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../sql/catalyst/analysis/Analyzer.scala | 2 ++ .../sql/catalyst/analysis/CheckAnalysis.scala | 32 +++++++++---------- .../analysis/AnalysisErrorSuite.scala | 9 ++++-- .../invalid-correlation.sql.out | 4 +-- 4 files changed, 26 insertions(+), 21 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 574f91b09912..ebe3b90e3b92 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -99,6 +99,8 @@ class Analyzer( def resolver: Resolver = conf.resolver + def checkAnalysis(plan: LogicalPlan): Unit = checkAnalysisWithConf(plan, conf) + protected val fixedPoint = FixedPoint(maxIterations) /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 5165fa9dbb06..39935e0fb71b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -22,6 +22,7 @@ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression import org.apache.spark.sql.catalyst.expressions.SubExprUtils._ import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ /** @@ -72,7 +73,9 @@ trait CheckAnalysis extends PredicateHelper { } } - def checkAnalysis(plan: LogicalPlan): Unit = { + def checkAnalysis(plan: LogicalPlan): Unit + + def checkAnalysisWithConf(plan: LogicalPlan, conf: SQLConf): Unit = { // We transform up and order the rules so as to catch the first possible failure instead // of the result of cascading resolution failures. plan.foreachUp { @@ -179,11 +182,11 @@ trait CheckAnalysis extends PredicateHelper { case fail => failAnalysis(s"Correlated scalar subqueries must be Aggregated: $fail") } } - checkAnalysis(query) + checkAnalysisWithConf(query, conf) s case s: SubqueryExpression => - checkAnalysis(s.plan) + checkAnalysisWithConf(s.plan, conf) s } @@ -336,20 +339,15 @@ trait CheckAnalysis extends PredicateHelper { operator match { case o if o.children.nonEmpty && o.missingInput.nonEmpty => + val resolver = conf.resolver + val commonAttrs = o.missingInput.filter(x => + o.inputSet.exists(y => resolver(x.name, y.name))) val missingAttributes = o.missingInput.mkString(",") val availableAttributes = o.inputSet.mkString(",") - val missingAttributesHelper = o.missingInput.map(a => (a.name, a.exprId.id)) - - val availableAttributesHelper = o.inputSet.map(a => (a.name, a.exprId.id)) - - val common = (missingAttributesHelper ++ availableAttributesHelper).groupBy(_._1) - val commonNames = common.map(x => (x._1, x._2.toSet)) - .filter(_._2.size > 1) - .map(_._1) - - val repeatedNameHint = if (commonNames.size > 0) { - s"\n|Observe that attribute(s) ${commonNames.mkString(",")} appear in your " + - "query with at least two different hashes, but same name." + val repeatedNameHint = if (commonAttrs.size > 0) { + val commonNames = commonAttrs.map(_.name).mkString(",") + s"""\n|Observe that attribute(s) $commonNames appear in two + |different datasets, with the same name""" } else { "" } @@ -357,9 +355,9 @@ trait CheckAnalysis extends PredicateHelper { failAnalysis( s"""Some resolved attribute(s) are not present among the available attributes |for a query. - |$missingAttributes is not in $availableAttributes. $repeatedNameHint + |$missingAttributes is not in $availableAttributes.$repeatedNameHint |The failed query was for operator - | ${operator.simpleString}""".stripMargin) + |${operator.simpleString}""".stripMargin) case p @ Project(exprs, _) if containsMultipleGenerators(exprs) => failAnalysis( diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala index 043c1e2268c4..097369b21972 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala @@ -402,16 +402,21 @@ class AnalysisErrorSuite extends AnalysisTest { val aId = Array[String](attrA.name, attrA.exprId.id.toString) val otherA = AttributeReference("a", LongType)(exprId = ExprId(2)) val otherAId = Array[String](otherA.name, otherA.exprId.id.toString) + val bAlias = Alias(sum(attrA), "b")() :: Nil val plan = Aggregate( Nil, - Alias(sum(attrA), "b")() :: Nil, + bAlias, LocalRelation(otherA)) assert(plan.resolved) val errorMsg = s"""Some resolved attribute(s) are not present among the available attributes |for a query. - |${aId.mkString("#")}L is not in ${otherAId.mkString("#")}L.""" + |${aId.mkString("#")}L is not in ${otherAId.mkString("#")}L. + |Observe that attribute(s) a appear in two + |different datasets, with the same name + |The failed query was for operator + |!Aggregate [${bAlias.mkString("#")}]""".stripMargin assertAnalysisError(plan, errorMsg :: Nil) diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out index 1bf2ff05e512..48ca43a0dfe1 100644 --- a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out @@ -65,6 +65,6 @@ struct<> org.apache.spark.sql.AnalysisException Some resolved attribute(s) are not present among the available attributes for a query. -t2b#x is not in min(t2a)#x,t2c#x. +t2b#x is not in min(t2a)#x,t2c#x. The failed query was for operator - !Filter t2c#x IN (list#x [t2b#x]); +!Filter t2c#x IN (list#x [t2b#x]); From c2dfe11c4c5da3ec8fb0983377400aa999213c35 Mon Sep 17 00:00:00 2001 From: Ruben Berenguel Montoro Date: Thu, 4 May 2017 00:42:40 +0100 Subject: [PATCH 06/13] SPARK-13947 Never quick-edit a non-clean merge after 1 AM --- .../results/subquery/negative-cases/invalid-correlation.sql.out | 1 - 1 file changed, 1 deletion(-) diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out index ecd2cb9a60fc..4bf0cd7032db 100644 --- a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out @@ -68,7 +68,6 @@ for a query. t2b#x is not in min(t2a)#x,t2c#x. The failed query was for operator !Filter t2c#x IN (list#x [t2b#x]); -resolved attribute(s) t2b#x missing from min(t2a)#x,t2c#x in operator !Filter t2c#x IN (list#x [t2b#x]); -- !query 5 From 75be3900f65e64bd083a91e5c63978489a61ccfe Mon Sep 17 00:00:00 2001 From: Ruben Berenguel Montoro Date: Sun, 22 Oct 2017 15:21:49 +0100 Subject: [PATCH 07/13] SPARK-13947 Code review modifications. AnalysisErrorSuite passes, but I get out of memory locally for a full suite --- .../spark/sql/catalyst/analysis/Analyzer.scala | 2 -- .../sql/catalyst/analysis/CheckAnalysis.scala | 18 ++++++++---------- .../catalyst/analysis/AnalysisErrorSuite.scala | 4 ++-- 3 files changed, 10 insertions(+), 14 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index fef48193b019..d6a962a14dc9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -98,8 +98,6 @@ class Analyzer( def resolver: Resolver = conf.resolver - def checkAnalysis(plan: LogicalPlan): Unit = checkAnalysisWithConf(plan, conf) - protected val fixedPoint = FixedPoint(maxIterations) /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 39979d1ae315..aa9c8490a69b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -75,9 +75,7 @@ trait CheckAnalysis extends PredicateHelper { } } - def checkAnalysis(plan: LogicalPlan): Unit - - def checkAnalysisWithConf(plan: LogicalPlan, conf: SQLConf): Unit = { + def checkAnalysis(plan: LogicalPlan): Unit = { // We transform up and order the rules so as to catch the first possible failure instead // of the result of cascading resolution failures. plan.foreachUp { @@ -128,7 +126,7 @@ trait CheckAnalysis extends PredicateHelper { } case s: SubqueryExpression => - checkAnalysisWithConf(s.plan, conf) + checkSubqueryExpression(operator, s) s } @@ -273,15 +271,15 @@ trait CheckAnalysis extends PredicateHelper { operator match { case o if o.children.nonEmpty && o.missingInput.nonEmpty => - val resolver = conf.resolver - val commonAttrs = o.missingInput.filter(x => + val resolver = plan.conf.resolver + val attrsWithSameName = o.missingInput.filter(x => o.inputSet.exists(y => resolver(x.name, y.name))) val missingAttributes = o.missingInput.mkString(",") val availableAttributes = o.inputSet.mkString(",") - val repeatedNameHint = if (commonAttrs.size > 0) { - val commonNames = commonAttrs.map(_.name).mkString(",") - s"""\n|Observe that attribute(s) $commonNames appear in two - |different datasets, with the same name""" + val repeatedNameHint = if (attrsWithSameName.size > 0) { + val commonNames = attrsWithSameName.map(_.name).mkString(",") + s"""\n|Attribute(s) `$commonNames` seem to appear in two + |different datasets, with the same name.""" } else { "" } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala index cd7a88538951..572288417262 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala @@ -423,8 +423,8 @@ class AnalysisErrorSuite extends AnalysisTest { val errorMsg = s"""Some resolved attribute(s) are not present among the available attributes |for a query. |${aId.mkString("#")}L is not in ${otherAId.mkString("#")}L. - |Observe that attribute(s) a appear in two - |different datasets, with the same name + |Attribute(s) `a` seem to appear in two + |different datasets, with the same name. |The failed query was for operator |!Aggregate [${bAlias.mkString("#")}]""".stripMargin From 6e8ab429a229df5cb075239c69fe90404734ef64 Mon Sep 17 00:00:00 2001 From: Ruben Berenguel Montoro Date: Mon, 23 Oct 2017 08:07:18 +0100 Subject: [PATCH 08/13] SPARK-13947 Another batch of code review modifications --- .../sql/catalyst/analysis/CheckAnalysis.scala | 19 ++++++++++--------- .../analysis/AnalysisErrorSuite.scala | 4 ++-- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index aa9c8490a69b..b9d45adf2c81 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -24,7 +24,6 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression import org.apache.spark.sql.catalyst.optimizer.BooleanSimplification import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical._ -import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ /** @@ -272,22 +271,24 @@ trait CheckAnalysis extends PredicateHelper { operator match { case o if o.children.nonEmpty && o.missingInput.nonEmpty => val resolver = plan.conf.resolver - val attrsWithSameName = o.missingInput.filter(x => - o.inputSet.exists(y => resolver(x.name, y.name))) - val missingAttributes = o.missingInput.mkString(",") - val availableAttributes = o.inputSet.mkString(",") - val repeatedNameHint = if (attrsWithSameName.size > 0) { + val attrsWithSameName = o.missingInput.filter(missing => + o.inputSet.exists(input => resolver(missing.name, input.name))) + val repeatedNameHint = if (attrsWithSameName.nonEmpty) { val commonNames = attrsWithSameName.map(_.name).mkString(",") - s"""\n|Attribute(s) `$commonNames` seem to appear in two - |different datasets, with the same name.""" + s"""|Please check attribute(s) `$commonNames`, they seem to appear in two + |different input operators, with the same name.""".stripMargin } else { "" } + val missingAttributes = o.missingInput.mkString(",") + val availableAttributes = o.inputSet.mkString(",") + failAnalysis( s"""Some resolved attribute(s) are not present among the available attributes |for a query. - |$missingAttributes is not in $availableAttributes.$repeatedNameHint + |$missingAttributes is not in $availableAttributes. + |$repeatedNameHint |The failed query was for operator |${operator.simpleString}""".stripMargin) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala index 572288417262..c50b105cda26 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala @@ -423,8 +423,8 @@ class AnalysisErrorSuite extends AnalysisTest { val errorMsg = s"""Some resolved attribute(s) are not present among the available attributes |for a query. |${aId.mkString("#")}L is not in ${otherAId.mkString("#")}L. - |Attribute(s) `a` seem to appear in two - |different datasets, with the same name. + |Please check attribute(s) `a`, they seem to appear in two + |different input operators, with the same name. |The failed query was for operator |!Aggregate [${bAlias.mkString("#")}]""".stripMargin From 72b72cfb0fecc169e0bc289f7f319bdb2b947de5 Mon Sep 17 00:00:00 2001 From: Ruben Berenguel Montoro Date: Mon, 23 Oct 2017 12:04:33 +0100 Subject: [PATCH 09/13] SPARK-13947 Another batch of code review modifications --- .../spark/sql/catalyst/analysis/CheckAnalysis.scala | 12 ++++-------- .../sql/catalyst/analysis/AnalysisErrorSuite.scala | 11 +++-------- .../negative-cases/invalid-correlation.sql.out | 7 ++----- 3 files changed, 9 insertions(+), 21 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index b9d45adf2c81..1025a0eec51a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -275,22 +275,18 @@ trait CheckAnalysis extends PredicateHelper { o.inputSet.exists(input => resolver(missing.name, input.name))) val repeatedNameHint = if (attrsWithSameName.nonEmpty) { val commonNames = attrsWithSameName.map(_.name).mkString(",") - s"""|Please check attribute(s) `$commonNames`, they seem to appear in two + s"""\n|Please check attribute(s) `$commonNames`, they seem to appear in two |different input operators, with the same name.""".stripMargin } else { "" } val missingAttributes = o.missingInput.mkString(",") - val availableAttributes = o.inputSet.mkString(",") + val input = o.inputSet.mkString(",") failAnalysis( - s"""Some resolved attribute(s) are not present among the available attributes - |for a query. - |$missingAttributes is not in $availableAttributes. - |$repeatedNameHint - |The failed query was for operator - |${operator.simpleString}""".stripMargin) + s"""Resolved attribute(s) $missingAttributes missing from $input + |in operator ${operator.simpleString}.$repeatedNameHint""".stripMargin) case p @ Project(exprs, _) if containsMultipleGenerators(exprs) => failAnalysis( diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala index c50b105cda26..5ec141d22268 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala @@ -409,9 +409,7 @@ class AnalysisErrorSuite extends AnalysisTest { // Since we manually construct the logical plan at here and Sum only accept // LongType, DoubleType, and DecimalType. We use LongType as the type of a. val attrA = AttributeReference("a", LongType)(exprId = ExprId(1)) - val aId = Array[String](attrA.name, attrA.exprId.id.toString) val otherA = AttributeReference("a", LongType)(exprId = ExprId(2)) - val otherAId = Array[String](otherA.name, otherA.exprId.id.toString) val bAlias = Alias(sum(attrA), "b")() :: Nil val plan = Aggregate( Nil, @@ -420,13 +418,10 @@ class AnalysisErrorSuite extends AnalysisTest { assert(plan.resolved) - val errorMsg = s"""Some resolved attribute(s) are not present among the available attributes - |for a query. - |${aId.mkString("#")}L is not in ${otherAId.mkString("#")}L. + val errorMsg = s"""Resolved attribute(s) ${attrA.toString} missing from ${otherA.toString} + |in operator !Aggregate [${bAlias.mkString("#")}]. |Please check attribute(s) `a`, they seem to appear in two - |different input operators, with the same name. - |The failed query was for operator - |!Aggregate [${bAlias.mkString("#")}]""".stripMargin + |different input operators, with the same name.""".stripMargin assertAnalysisError(plan, errorMsg :: Nil) diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out index 4bf0cd7032db..ba8ff97dc5cb 100644 --- a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out @@ -63,11 +63,8 @@ WHERE t1a IN (SELECT min(t2a) struct<> -- !query 4 output org.apache.spark.sql.AnalysisException -Some resolved attribute(s) are not present among the available attributes -for a query. -t2b#x is not in min(t2a)#x,t2c#x. -The failed query was for operator -!Filter t2c#x IN (list#x [t2b#x]); +Resolved attribute(s) t2b#x missing from min(t2a)#x,t2c#x +in operator !Filter t2c#x IN (list#x [t2b#x]).; -- !query 5 From 33a8a71092b699f6e402c67684fe4bfab6d2ccf8 Mon Sep 17 00:00:00 2001 From: Ruben Berenguel Montoro Date: Mon, 23 Oct 2017 12:13:31 +0100 Subject: [PATCH 10/13] SPARK-13947 Format change for a lambda --- .../apache/spark/sql/catalyst/analysis/CheckAnalysis.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 1025a0eec51a..d87207ce0919 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -271,8 +271,9 @@ trait CheckAnalysis extends PredicateHelper { operator match { case o if o.children.nonEmpty && o.missingInput.nonEmpty => val resolver = plan.conf.resolver - val attrsWithSameName = o.missingInput.filter(missing => - o.inputSet.exists(input => resolver(missing.name, input.name))) + val attrsWithSameName = o.missingInput.filter { missing => + o.inputSet.exists(input => resolver(missing.name, input.name)) + } val repeatedNameHint = if (attrsWithSameName.nonEmpty) { val commonNames = attrsWithSameName.map(_.name).mkString(",") s"""\n|Please check attribute(s) `$commonNames`, they seem to appear in two From e3cc4555b7a14299b93e8e4e0ae6d7f482582f9d Mon Sep 17 00:00:00 2001 From: Ruben Berenguel Montoro Date: Mon, 23 Oct 2017 12:20:17 +0100 Subject: [PATCH 11/13] SPARK-13947 Improve the final formatting --- .../spark/sql/catalyst/analysis/CheckAnalysis.scala | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index d87207ce0919..a44b481c07df 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -276,7 +276,7 @@ trait CheckAnalysis extends PredicateHelper { } val repeatedNameHint = if (attrsWithSameName.nonEmpty) { val commonNames = attrsWithSameName.map(_.name).mkString(",") - s"""\n|Please check attribute(s) `$commonNames`, they seem to appear in two + s"""|Please check attribute(s) `$commonNames`, they seem to appear in two |different input operators, with the same name.""".stripMargin } else { "" @@ -285,9 +285,10 @@ trait CheckAnalysis extends PredicateHelper { val missingAttributes = o.missingInput.mkString(",") val input = o.inputSet.mkString(",") - failAnalysis( - s"""Resolved attribute(s) $missingAttributes missing from $input - |in operator ${operator.simpleString}.$repeatedNameHint""".stripMargin) + val msg = s"""Resolved attribute(s) $missingAttributes missing from $input + |in operator ${operator.simpleString}.""".stripMargin + + failAnalysis(if (repeatedNameHint.nonEmpty) msg + "\n" + repeatedNameHint else msg) case p @ Project(exprs, _) if containsMultipleGenerators(exprs) => failAnalysis( From 34753b513323f5076edd4c5006983a9c9d3d97d7 Mon Sep 17 00:00:00 2001 From: Ruben Berenguel Montoro Date: Mon, 23 Oct 2017 22:08:00 +0100 Subject: [PATCH 12/13] SPARK-13947 Test and message improvements --- .../sql/catalyst/analysis/CheckAnalysis.scala | 6 +++--- .../catalyst/analysis/AnalysisErrorSuite.scala | 18 ++++++++++-------- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index a44b481c07df..a106abf82116 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -275,9 +275,9 @@ trait CheckAnalysis extends PredicateHelper { o.inputSet.exists(input => resolver(missing.name, input.name)) } val repeatedNameHint = if (attrsWithSameName.nonEmpty) { - val commonNames = attrsWithSameName.map(_.name).mkString(",") - s"""|Please check attribute(s) `$commonNames`, they seem to appear in two - |different input operators, with the same name.""".stripMargin + val sameNames = attrsWithSameName.map(_.name).mkString(",") + s"""Attribute(s) with the same name appear in the operation: `$sameNames`. + |Please check if the right attribute(s) are used.""".stripMargin } else { "" } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala index 5ec141d22268..c47f640e6297 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala @@ -410,19 +410,21 @@ class AnalysisErrorSuite extends AnalysisTest { // LongType, DoubleType, and DecimalType. We use LongType as the type of a. val attrA = AttributeReference("a", LongType)(exprId = ExprId(1)) val otherA = AttributeReference("a", LongType)(exprId = ExprId(2)) - val bAlias = Alias(sum(attrA), "b")() :: Nil + val attrC = AttributeReference("c", LongType)(exprId = ExprId(3)) + val aliases = Alias(sum(attrA), "b")() :: Alias(sum(attrC), "d")() :: Nil val plan = Aggregate( - Nil, - bAlias, - LocalRelation(otherA)) + Nil, + aliases, + LocalRelation(otherA)) assert(plan.resolved) - val errorMsg = s"""Resolved attribute(s) ${attrA.toString} missing from ${otherA.toString} - |in operator !Aggregate [${bAlias.mkString("#")}]. - |Please check attribute(s) `a`, they seem to appear in two - |different input operators, with the same name.""".stripMargin + val resolved = s"${attrA.toString},${attrC.toString}" + val errorMsg = s"""Resolved attribute(s) $resolved missing from ${otherA.toString} + |in operator !Aggregate [${aliases.mkString(", ")}]. + |Attribute(s) with the same name appear in the operation: `a`. + |Please check if the right attribute(s) are used.""".stripMargin assertAnalysisError(plan, errorMsg :: Nil) } From b8fbdea3b35ea69e6dc74ea9c11e257ac88dc765 Mon Sep 17 00:00:00 2001 From: Ruben Berenguel Montoro Date: Tue, 24 Oct 2017 20:26:47 +0100 Subject: [PATCH 13/13] SPARK-13947 More code review changes --- .../sql/catalyst/analysis/CheckAnalysis.scala | 22 +++++++++---------- .../analysis/AnalysisErrorSuite.scala | 8 +++---- .../invalid-correlation.sql.out | 3 +-- 3 files changed, 16 insertions(+), 17 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index a106abf82116..b5e8bdd79869 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -270,25 +270,25 @@ trait CheckAnalysis extends PredicateHelper { operator match { case o if o.children.nonEmpty && o.missingInput.nonEmpty => + val missingAttributes = o.missingInput.mkString(",") + val input = o.inputSet.mkString(",") + val msgForMissingAttributes = s"Resolved attribute(s) $missingAttributes missing " + + s"from $input in operator ${operator.simpleString}." + val resolver = plan.conf.resolver val attrsWithSameName = o.missingInput.filter { missing => o.inputSet.exists(input => resolver(missing.name, input.name)) } - val repeatedNameHint = if (attrsWithSameName.nonEmpty) { + + val msg = if (attrsWithSameName.nonEmpty) { val sameNames = attrsWithSameName.map(_.name).mkString(",") - s"""Attribute(s) with the same name appear in the operation: `$sameNames`. - |Please check if the right attribute(s) are used.""".stripMargin + s"$msgForMissingAttributes Attribute(s) with the same name appear in the " + + s"operation: $sameNames. Please check if the right attribute(s) are used." } else { - "" + msgForMissingAttributes } - val missingAttributes = o.missingInput.mkString(",") - val input = o.inputSet.mkString(",") - - val msg = s"""Resolved attribute(s) $missingAttributes missing from $input - |in operator ${operator.simpleString}.""".stripMargin - - failAnalysis(if (repeatedNameHint.nonEmpty) msg + "\n" + repeatedNameHint else msg) + failAnalysis(msg) case p @ Project(exprs, _) if containsMultipleGenerators(exprs) => failAnalysis( diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala index c47f640e6297..5d2f8e735e3d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala @@ -421,10 +421,10 @@ class AnalysisErrorSuite extends AnalysisTest { val resolved = s"${attrA.toString},${attrC.toString}" - val errorMsg = s"""Resolved attribute(s) $resolved missing from ${otherA.toString} - |in operator !Aggregate [${aliases.mkString(", ")}]. - |Attribute(s) with the same name appear in the operation: `a`. - |Please check if the right attribute(s) are used.""".stripMargin + val errorMsg = s"Resolved attribute(s) $resolved missing from ${otherA.toString} " + + s"in operator !Aggregate [${aliases.mkString(", ")}]. " + + s"Attribute(s) with the same name appear in the operation: a. " + + "Please check if the right attribute(s) are used." assertAnalysisError(plan, errorMsg :: Nil) } diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out index ba8ff97dc5cb..2586f26f71c3 100644 --- a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out @@ -63,8 +63,7 @@ WHERE t1a IN (SELECT min(t2a) struct<> -- !query 4 output org.apache.spark.sql.AnalysisException -Resolved attribute(s) t2b#x missing from min(t2a)#x,t2c#x -in operator !Filter t2c#x IN (list#x [t2b#x]).; +Resolved attribute(s) t2b#x missing from min(t2a)#x,t2c#x in operator !Filter t2c#x IN (list#x [t2b#x]).; -- !query 5