From f59e9dfae943bf9a93e1b035f60f1b2c22173048 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Fri, 14 Oct 2022 08:22:20 +0200 Subject: [PATCH 1/6] Fix backticks for column candidates in error message --- .../expressions/namedExpressions.scala | 2 +- .../analysis/AnalysisErrorSuite.scala | 18 +++++++++++++ .../sql/catalyst/analysis/TestRelations.scala | 2 ++ .../org/apache/spark/sql/DatasetSuite.scala | 27 +++++++++++++++++++ 4 files changed, 48 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala index 4181edcb8c60..7d3bd22264b2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala @@ -75,7 +75,7 @@ trait NamedExpression extends Expression { * multiple qualifiers, it is possible that there are other possible way to refer to this * attribute. */ - def qualifiedName: String = (qualifier :+ name).mkString(".") + def qualifiedName: String = (qualifier :+ name).map(quoteIfNeeded).mkString(".") /** * Optional qualifier for the expression. diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala index ac823183ce9d..bd23afd873c6 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala @@ -876,6 +876,24 @@ class AnalysisErrorSuite extends AnalysisTest { "[`a`, `b`, `c`, `d`, `e`]" :: Nil) + errorTest( + "SPARK-39783: backticks in error message for candidate column with dots", + // This selects a column that does not exist, + // the error message suggest the existing column with correct backticks + testRelation6.select($"`the`.`id`"), + "[UNRESOLVED_COLUMN.WITH_SUGGESTION] A column or function parameter with name `the`.`id` " + + "cannot be resolved. Did you mean one of the following? [`the.id`]" + :: Nil) + + errorTest( + "SPARK-39783: backticks in error message for candidate struct column", + // This selects a column that does not exist, + // the error message suggest the existing column with correct backticks + nestedRelation2.select($"`top.aField`"), + "[UNRESOLVED_COLUMN.WITH_SUGGESTION] A column or function parameter with name `top.aField` " + + "cannot be resolved. Did you mean one of the following? [`top`]" + :: Nil) + test("SPARK-35080: Unsupported correlated equality predicates in subquery") { val a = AttributeReference("a", IntegerType)() val b = AttributeReference("b", IntegerType)() diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TestRelations.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TestRelations.scala index 33b602907093..d54237fcc140 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TestRelations.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TestRelations.scala @@ -46,6 +46,8 @@ object TestRelations { val testRelation5 = LocalRelation(AttributeReference("i", StringType)()) + val testRelation6 = LocalRelation(AttributeReference("the.id", LongType)()) + val nestedRelation = LocalRelation( AttributeReference("top", StructType( StructField("duplicateField", StringType) :: diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala index 591d2b943590..aea2b4949118 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala @@ -1992,6 +1992,33 @@ class DatasetSuite extends QueryTest } } + test("SPARK-39783: Fix error messages for columns with dots/periods") { + forAll(dotColumnTestModes) { (caseSensitive, colName) => + val ds = Seq(SpecialCharClass("1", "2")).toDS + withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive) { + val errorMsg = intercept[AnalysisException] { + // Note: ds(colName) has different semantics than ds.select(colName) + ds.select(colName) + } + assert(errorMsg.getMessage.contains( + s"A column or function parameter with name `${colName.replace(".", "`.`")}` " + + s"cannot be resolved. Did you mean one of the following? [`field.1`, `field 2`]")) + } + } + } + + test("SPARK-39783: backticks in error message for candidate column with dots") { + checkError( + exception = intercept[AnalysisException] { + Seq(0).toDF("the.id").select("the.id") + }, + errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION", + sqlState = None, + parameters = Map( + "objectName" -> "`the`.`id`", + "proposal" -> "`the.id`")) + } + test("groupBy.as") { val df1 = Seq(DoubleData(1, "one"), DoubleData(2, "two"), DoubleData(3, "three")).toDS() .repartition($"id").sortWithinPartitions("id") From 1d8a919064a3f53eb6f99c97e1e93b7543d73d8f Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Fri, 14 Oct 2022 14:53:52 +0200 Subject: [PATCH 2/6] Adjust expected proposal to correct one --- .../test/scala/org/apache/spark/sql/DatasetUnpivotSuite.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetUnpivotSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetUnpivotSuite.scala index f2f31851acba..3c05a7415a12 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetUnpivotSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetUnpivotSuite.scala @@ -504,10 +504,9 @@ class DatasetUnpivotSuite extends QueryTest checkError( exception = e, errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION", - // expected message is wrong: https://issues.apache.org/jira/browse/SPARK-39783 parameters = Map( "objectName" -> "`an`.`id`", - "proposal" -> "`an`.`id`, `int1`, `long1`, `str`.`one`, `str`.`two`")) + "proposal" -> "`an.id`, `int1`, `long1`, `str.one`, `str.two`")) } test("unpivot with struct fields") { From dc8070644e8aaa9437cb0890940d639f6b379ff8 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Fri, 14 Oct 2022 19:36:06 +0200 Subject: [PATCH 3/6] Use checkError and errorClassTest --- .../catalyst/analysis/AnalysisErrorSuite.scala | 18 ++++++++++-------- .../org/apache/spark/sql/DatasetSuite.scala | 18 +++++++++++------- 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala index bd23afd873c6..88de52aec8f6 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala @@ -876,23 +876,25 @@ class AnalysisErrorSuite extends AnalysisTest { "[`a`, `b`, `c`, `d`, `e`]" :: Nil) - errorTest( + errorClassTest( "SPARK-39783: backticks in error message for candidate column with dots", // This selects a column that does not exist, // the error message suggest the existing column with correct backticks testRelation6.select($"`the`.`id`"), - "[UNRESOLVED_COLUMN.WITH_SUGGESTION] A column or function parameter with name `the`.`id` " + - "cannot be resolved. Did you mean one of the following? [`the.id`]" - :: Nil) + errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION", + messageParameters = Map( + "objectName" -> "`the`.`id`", + "proposal" -> "`the.id`")) - errorTest( + errorClassTest( "SPARK-39783: backticks in error message for candidate struct column", // This selects a column that does not exist, // the error message suggest the existing column with correct backticks nestedRelation2.select($"`top.aField`"), - "[UNRESOLVED_COLUMN.WITH_SUGGESTION] A column or function parameter with name `top.aField` " + - "cannot be resolved. Did you mean one of the following? [`top`]" - :: Nil) + errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION", + messageParameters = Map( + "objectName" -> "`top.aField`", + "proposal" -> "`top`")) test("SPARK-35080: Unsupported correlated equality predicates in subquery") { val a = AttributeReference("a", IntegerType)() diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala index aea2b4949118..c0f2704cd6e8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala @@ -1996,13 +1996,17 @@ class DatasetSuite extends QueryTest forAll(dotColumnTestModes) { (caseSensitive, colName) => val ds = Seq(SpecialCharClass("1", "2")).toDS withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive) { - val errorMsg = intercept[AnalysisException] { - // Note: ds(colName) has different semantics than ds.select(colName) - ds.select(colName) - } - assert(errorMsg.getMessage.contains( - s"A column or function parameter with name `${colName.replace(".", "`.`")}` " + - s"cannot be resolved. Did you mean one of the following? [`field.1`, `field 2`]")) + checkError( + exception = intercept[AnalysisException] { + // Note: ds(colName) "SPARK-25153: Improve error messages for columns with dots/periods" + // has different semantics than ds.select(colName) + ds.select(colName) + }, + errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION", + sqlState = None, + parameters = Map( + "objectName" -> s"`${colName.replace(".", "`.`")}`", + "proposal" -> "`field.1`, `field 2`")) } } } From cef2711be03d6e3696f0a1c952922916049be160 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Fri, 14 Oct 2022 20:16:53 +0200 Subject: [PATCH 4/6] Make ResolveUserSpecifiedColumns use qualifiedName for candidates --- .../scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 097307bce3de..962b6b88f56d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -3479,7 +3479,7 @@ class Analyzer(override val catalogManager: CatalogManager) i.userSpecifiedCols.map { col => i.table.resolve(Seq(col), resolver).getOrElse { - val candidates = i.table.output.map(_.name) + val candidates = i.table.output.map(_.qualifiedName) val orderedCandidates = StringUtils.orderStringsBySimilarity(col, candidates) throw QueryCompilationErrors .unresolvedAttributeError("UNRESOLVED_COLUMN", col, orderedCandidates, i.origin) From f919303c941dccd432db0f7251461e4996cd4e7f Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Fri, 14 Oct 2022 20:52:03 +0200 Subject: [PATCH 5/6] Test for UNRESOLVED_MAP_KEY.WITH_SUGGESTION --- .../org/apache/spark/sql/DatasetSuite.scala | 14 ++++++++++++++ .../sql/errors/QueryCompilationErrorsSuite.scala | 16 ++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala index c0f2704cd6e8..260cc9396ab9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala @@ -2023,6 +2023,20 @@ class DatasetSuite extends QueryTest "proposal" -> "`the.id`")) } + test("SPARK-39783: backticks in error message for map candidate key with dots") { + checkError( + exception = intercept[AnalysisException] { + spark.range(1) + .select(map(lit("key"), lit(1)).as("map"), lit(2).as("other.column")) + .select($"`map`"($"nonexisting")).show() + }, + errorClass = "UNRESOLVED_MAP_KEY.WITH_SUGGESTION", + sqlState = None, + parameters = Map( + "objectName" -> "`nonexisting`", + "proposal" -> "`map`, `other.column`")) + } + test("groupBy.as") { val df1 = Seq(DoubleData(1, "one"), DoubleData(2, "two"), DoubleData(3, "three")).toDS() .repartition($"id").sortWithinPartitions("id") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala index 75c157d4b885..8086a0e97ec2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala @@ -419,6 +419,22 @@ class QueryCompilationErrorsSuite ) } + test("UNRESOLVED_MAP_KEY: proposal columns containing quoted dots") { + val query = "select m[a] from (select map('a', 'b') as m, 'aa' as `a.a`)" + checkError( + exception = intercept[AnalysisException] {sql(query)}, + errorClass = "UNRESOLVED_MAP_KEY.WITH_SUGGESTION", + sqlState = None, + parameters = Map("objectName" -> "`a`", + "proposal" -> + "`__auto_generated_subquery_name`.`m`, `__auto_generated_subquery_name`.`a.a`"), + context = ExpectedContext( + fragment = "a", + start = 9, + stop = 9) + ) + } + test("UNRESOLVED_COLUMN: SELECT distinct does not work correctly " + "if order by missing attribute") { checkAnswer( From b9c990794016d9453ff60822f2fe70cf369123fe Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Fri, 14 Oct 2022 20:59:41 +0200 Subject: [PATCH 6/6] Update documentation of qualifiedName --- .../spark/sql/catalyst/expressions/namedExpressions.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala index 7d3bd22264b2..99e5f411bdb6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala @@ -71,9 +71,9 @@ trait NamedExpression extends Expression { def exprId: ExprId /** - * Returns a dot separated fully qualified name for this attribute. Given that there can be - * multiple qualifiers, it is possible that there are other possible way to refer to this - * attribute. + * Returns a dot separated fully qualified name for this attribute. If the name or any qualifier + * contains `dots`, it is quoted to avoid confusion. Given that there can be multiple qualifiers, + * it is possible that there are other possible way to refer to this attribute. */ def qualifiedName: String = (qualifier :+ name).map(quoteIfNeeded).mkString(".")