From 6235db4e1d11c942c28161a3f615dbfc6de8a2e7 Mon Sep 17 00:00:00 2001 From: Michael Armbrust Date: Fri, 6 Feb 2015 13:28:46 -0800 Subject: [PATCH 1/5] [SQL] Add an exception for analysis errors. --- .../spark/sql/catalyst/analysis/Analyzer.scala | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 0b59ed173956..85bfd5bbb48e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -32,6 +32,11 @@ import org.apache.spark.sql.types.IntegerType */ object SimpleAnalyzer extends Analyzer(EmptyCatalog, EmptyFunctionRegistry, true) +/** + * Thrown when a query fails to analyze, usually because the query itself is invalid. + */ +class AnalysisException(message: String) extends Exception(message) with Serializable + /** * Provides a logical query plan analyzer, which translates [[UnresolvedAttribute]]s and * [[UnresolvedRelation]]s into fully typed objects using information in a schema [[Catalog]] and @@ -81,16 +86,18 @@ class Analyzer(catalog: Catalog, */ object CheckResolution extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = { - plan.transform { + plan.transformUp { case p if p.expressions.exists(!_.resolved) => - throw new TreeNodeException(p, - s"Unresolved attributes: ${p.expressions.filterNot(_.resolved).mkString(",")}") + val missing = p.expressions.filterNot(_.resolved).map(_.prettyString).mkString(",") + val from = p.inputSet.map(_.name).mkString("{", ", ", "}") + + throw new AnalysisException(s"Cannot resolve '$missing' given input columns $from") case p if !p.resolved && p.childrenResolved => - throw new TreeNodeException(p, "Unresolved plan found") + throw new AnalysisException(s"Unresolved operator in the query plan ${p.simpleString}") } match { // As a backstop, use the root node to check that the entire plan tree is resolved. case p if !p.resolved => - throw new TreeNodeException(p, "Unresolved plan in tree") + throw new AnalysisException(s"Unresolved operator in the query plan ${p.simpleString}") case p => p } } From fbf4bc3a0383d238cc0e0142750b06fba088550c Mon Sep 17 00:00:00 2001 From: Michael Armbrust Date: Fri, 6 Feb 2015 14:30:15 -0800 Subject: [PATCH 2/5] move to sql --- .../apache/spark/sql/AnalysisException.scala | 23 +++++++++++++++++++ .../sql/catalyst/analysis/Analyzer.scala | 6 +---- 2 files changed, 24 insertions(+), 5 deletions(-) create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/AnalysisException.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/AnalysisException.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/AnalysisException.scala new file mode 100644 index 000000000000..a9ba1d99d7d9 --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/AnalysisException.scala @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql + +/** + * Thrown when a query fails to analyze, usually because the query itself is invalid. + */ +class AnalysisException(message: String) extends Exception(message) with Serializable \ No newline at end of file diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 85bfd5bbb48e..71c5f2976e8e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -18,6 +18,7 @@ package org.apache.spark.sql.catalyst.analysis import org.apache.spark.util.collection.OpenHashSet +import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.errors.TreeNodeException import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.logical._ @@ -32,11 +33,6 @@ import org.apache.spark.sql.types.IntegerType */ object SimpleAnalyzer extends Analyzer(EmptyCatalog, EmptyFunctionRegistry, true) -/** - * Thrown when a query fails to analyze, usually because the query itself is invalid. - */ -class AnalysisException(message: String) extends Exception(message) with Serializable - /** * Provides a logical query plan analyzer, which translates [[UnresolvedAttribute]]s and * [[UnresolvedRelation]]s into fully typed objects using information in a schema [[Catalog]] and From fede90a01bc5d0d11b1288070a42c4eb396091be Mon Sep 17 00:00:00 2001 From: Michael Armbrust Date: Fri, 6 Feb 2015 14:31:02 -0800 Subject: [PATCH 3/5] newline --- .../src/main/scala/org/apache/spark/sql/AnalysisException.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/AnalysisException.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/AnalysisException.scala index a9ba1d99d7d9..871d560b9d54 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/AnalysisException.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/AnalysisException.scala @@ -20,4 +20,4 @@ package org.apache.spark.sql /** * Thrown when a query fails to analyze, usually because the query itself is invalid. */ -class AnalysisException(message: String) extends Exception(message) with Serializable \ No newline at end of file +class AnalysisException(message: String) extends Exception(message) with Serializable From f88079f0734152e116849bda95405da05986a503 Mon Sep 17 00:00:00 2001 From: Michael Armbrust Date: Sun, 8 Feb 2015 16:20:08 -0800 Subject: [PATCH 4/5] update more cases --- .../org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 71c5f2976e8e..7d1f319e721f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -318,15 +318,17 @@ class Analyzer(catalog: Catalog, case StructType(fields) => val actualField = fields.filter(f => resolver(f.name, fieldName)) if (actualField.length == 0) { - sys.error( + throw new AnalysisException( s"No such struct field $fieldName in ${fields.map(_.name).mkString(", ")}") } else if (actualField.length == 1) { val field = actualField(0) GetField(expr, field, fields.indexOf(field)) } else { - sys.error(s"Ambiguous reference to fields ${actualField.mkString(", ")}") + throw new AnalysisException( + s"Ambiguous reference to fields ${actualField.mkString(", ")}") } - case otherType => sys.error(s"GetField is not valid on fields of type $otherType") + case otherType => + throw new AnalysisException(s"GetField is not valid on columns of type $otherType") } } } From 45862a0a99b1aecfddcfcead5c0cad4793f4a464 Mon Sep 17 00:00:00 2001 From: Michael Armbrust Date: Tue, 10 Feb 2015 16:05:16 -0800 Subject: [PATCH 5/5] fix hive test --- .../apache/spark/sql/hive/execution/HiveResolutionSuite.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala index ff8130ae5f6b..ab5f9cdddf50 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala @@ -17,6 +17,7 @@ package org.apache.spark.sql.hive.execution +import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.hive.test.TestHive.{sparkContext, jsonRDD, sql} import org.apache.spark.sql.hive.test.TestHive.implicits._ @@ -40,7 +41,7 @@ class HiveResolutionSuite extends HiveComparisonTest { """{"a": [{"b": 1, "B": 2}]}""" :: Nil)).registerTempTable("nested") // there are 2 filed matching field name "b", we should report Ambiguous reference error - val exception = intercept[RuntimeException] { + val exception = intercept[AnalysisException] { sql("SELECT a[0].b from nested").queryExecution.analyzed } assert(exception.getMessage.contains("Ambiguous reference to fields"))