From bcd2271cba1c43fedca816242088d67b951e5d69 Mon Sep 17 00:00:00 2001 From: allisonwang-db Date: Mon, 8 Jan 2024 16:52:34 +0800 Subject: [PATCH 1/3] improve error msg --- common/utils/src/main/resources/error/error-classes.json | 2 +- docs/sql-error-conditions.md | 2 +- .../apache/spark/sql/errors/QueryCompilationErrors.scala | 7 +++++++ .../apache/spark/sql/errors/QueryExecutionErrors.scala | 8 -------- .../spark/sql/execution/datasources/DataSource.scala | 2 +- .../org/apache/spark/sql/execution/command/DDLSuite.scala | 2 +- .../spark/sql/sources/ResolvedDataSourceSuite.scala | 2 +- 7 files changed, 12 insertions(+), 13 deletions(-) diff --git a/common/utils/src/main/resources/error/error-classes.json b/common/utils/src/main/resources/error/error-classes.json index 1812e9d764505..be441547b4b80 100644 --- a/common/utils/src/main/resources/error/error-classes.json +++ b/common/utils/src/main/resources/error/error-classes.json @@ -889,7 +889,7 @@ }, "DATA_SOURCE_NOT_FOUND" : { "message" : [ - "Failed to find the data source: . Please find packages at `https://spark.apache.org/third-party-projects.html`." + "Failed to find the data source: . Make sure the provider name is correct and the package is properly registered and compatible with your Spark version." ], "sqlState" : "42K02" }, diff --git a/docs/sql-error-conditions.md b/docs/sql-error-conditions.md index f58b7f607a0ba..9ff29a34dbdb8 100644 --- a/docs/sql-error-conditions.md +++ b/docs/sql-error-conditions.md @@ -490,7 +490,7 @@ Data source '``' not found. Please make sure the data source is regist [SQLSTATE: 42K02](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation) -Failed to find the data source: ``. Please find packages at `https://spark.apache.org/third-party-projects.html`. +Failed to find the data source: ``. Make sure the provider name is correct and the package is properly registered and compatible with your Spark version. ### DATA_SOURCE_TABLE_SCHEMA_MISMATCH diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index 3870646957704..ee48780347e0f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -1636,6 +1636,13 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat messageParameters = Map("provider" -> provider)) } + def dataSourceNotFoundError(provider: String, error: Throwable): Throwable = { + new AnalysisException( + errorClass = "DATA_SOURCE_NOT_FOUND", + messageParameters = Map("provider" -> provider), + cause = Some(error)) + } + def findMultipleDataSourceError(provider: String, sourceNames: Seq[String]): Throwable = { new AnalysisException( errorClass = "_LEGACY_ERROR_TEMP_1141", diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index a3e905090bf3c..867d2e01ff45f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -702,14 +702,6 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE messageParameters = Map("paths" -> allPaths.mkString(", "))) } - def dataSourceNotFoundError( - provider: String, error: Throwable): SparkClassNotFoundException = { - new SparkClassNotFoundException( - errorClass = "DATA_SOURCE_NOT_FOUND", - messageParameters = Map("provider" -> provider), - cause = error) - } - def removedClassInSpark2Error(className: String, e: Throwable): SparkClassNotFoundException = { new SparkClassNotFoundException( errorClass = "_LEGACY_ERROR_TEMP_2052", diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala index decc20c52531c..ccbb267795830 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala @@ -663,7 +663,7 @@ object DataSource extends Logging { } else if (isUserDefinedDataSource) { classOf[PythonTableProvider] } else { - throw QueryExecutionErrors.dataSourceNotFoundError(provider1, error) + throw QueryCompilationErrors.dataSourceNotFoundError(provider1, error) } } } catch { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index 2f045b35804d9..c32aa3cc706f5 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -2233,7 +2233,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase { val table2 = catalog.getTableMetadata(TableIdentifier("t2")) assert(table2.provider == Some("hive")) - val e1 = intercept[SparkClassNotFoundException] { + val e1 = intercept[AnalysisException] { sql("CREATE TABLE t3 LIKE s USING unknown") } checkError( diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/ResolvedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/ResolvedDataSourceSuite.scala index 6067efc1d1c1c..a49e52d18254d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/ResolvedDataSourceSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/ResolvedDataSourceSuite.scala @@ -101,7 +101,7 @@ class ResolvedDataSourceSuite extends SharedSparkSession { } test("error message for unknown data sources") { - val error = intercept[SparkClassNotFoundException] { + val error = intercept[AnalysisException] { getProvidingClass("asfdwefasdfasdf") } checkError( From f44246dcffc30be84993b75008c8f5ae92dbc8e1 Mon Sep 17 00:00:00 2001 From: allisonwang-db Date: Mon, 8 Jan 2024 20:27:21 +0800 Subject: [PATCH 2/3] fix style --- .../scala/org/apache/spark/sql/execution/command/DDLSuite.scala | 2 +- .../org/apache/spark/sql/sources/ResolvedDataSourceSuite.scala | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index c32aa3cc706f5..bc07babe6e35c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -24,7 +24,7 @@ import java.util.Locale import org.apache.hadoop.fs.{Path, RawLocalFileSystem} import org.apache.hadoop.fs.permission.{AclEntry, AclStatus} -import org.apache.spark.{SparkClassNotFoundException, SparkException, SparkFiles, SparkRuntimeException} +import org.apache.spark.{SparkException, SparkFiles, SparkRuntimeException} import org.apache.spark.internal.config import org.apache.spark.sql.{AnalysisException, QueryTest, Row, SaveMode} import org.apache.spark.sql.catalyst.{FunctionIdentifier, QualifiedTableName, TableIdentifier} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/ResolvedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/ResolvedDataSourceSuite.scala index a49e52d18254d..b2b4349df1b0e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/ResolvedDataSourceSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/ResolvedDataSourceSuite.scala @@ -19,7 +19,6 @@ package org.apache.spark.sql.sources import java.time.ZoneId -import org.apache.spark.SparkClassNotFoundException import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.util.DateTimeUtils import org.apache.spark.sql.execution.datasources.DataSource From f287582cb6cb7b5bf47b28eafc609f405e3cbfc2 Mon Sep 17 00:00:00 2001 From: allisonwang-db Date: Wed, 17 Jan 2024 10:21:38 -0800 Subject: [PATCH 3/3] fix --- .../apache/spark/sql/errors/QueryCompilationErrors.scala | 7 ------- .../apache/spark/sql/errors/QueryExecutionErrors.scala | 8 ++++++++ .../spark/sql/execution/datasources/DataSource.scala | 2 +- .../org/apache/spark/sql/execution/command/DDLSuite.scala | 4 ++-- .../spark/sql/sources/ResolvedDataSourceSuite.scala | 3 ++- 5 files changed, 13 insertions(+), 11 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index ee48780347e0f..3870646957704 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -1636,13 +1636,6 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat messageParameters = Map("provider" -> provider)) } - def dataSourceNotFoundError(provider: String, error: Throwable): Throwable = { - new AnalysisException( - errorClass = "DATA_SOURCE_NOT_FOUND", - messageParameters = Map("provider" -> provider), - cause = Some(error)) - } - def findMultipleDataSourceError(provider: String, sourceNames: Seq[String]): Throwable = { new AnalysisException( errorClass = "_LEGACY_ERROR_TEMP_1141", diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 867d2e01ff45f..a3e905090bf3c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -702,6 +702,14 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE messageParameters = Map("paths" -> allPaths.mkString(", "))) } + def dataSourceNotFoundError( + provider: String, error: Throwable): SparkClassNotFoundException = { + new SparkClassNotFoundException( + errorClass = "DATA_SOURCE_NOT_FOUND", + messageParameters = Map("provider" -> provider), + cause = error) + } + def removedClassInSpark2Error(className: String, e: Throwable): SparkClassNotFoundException = { new SparkClassNotFoundException( errorClass = "_LEGACY_ERROR_TEMP_2052", diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala index ccbb267795830..decc20c52531c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala @@ -663,7 +663,7 @@ object DataSource extends Logging { } else if (isUserDefinedDataSource) { classOf[PythonTableProvider] } else { - throw QueryCompilationErrors.dataSourceNotFoundError(provider1, error) + throw QueryExecutionErrors.dataSourceNotFoundError(provider1, error) } } } catch { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index bc07babe6e35c..2f045b35804d9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -24,7 +24,7 @@ import java.util.Locale import org.apache.hadoop.fs.{Path, RawLocalFileSystem} import org.apache.hadoop.fs.permission.{AclEntry, AclStatus} -import org.apache.spark.{SparkException, SparkFiles, SparkRuntimeException} +import org.apache.spark.{SparkClassNotFoundException, SparkException, SparkFiles, SparkRuntimeException} import org.apache.spark.internal.config import org.apache.spark.sql.{AnalysisException, QueryTest, Row, SaveMode} import org.apache.spark.sql.catalyst.{FunctionIdentifier, QualifiedTableName, TableIdentifier} @@ -2233,7 +2233,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase { val table2 = catalog.getTableMetadata(TableIdentifier("t2")) assert(table2.provider == Some("hive")) - val e1 = intercept[AnalysisException] { + val e1 = intercept[SparkClassNotFoundException] { sql("CREATE TABLE t3 LIKE s USING unknown") } checkError( diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/ResolvedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/ResolvedDataSourceSuite.scala index b2b4349df1b0e..6067efc1d1c1c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/ResolvedDataSourceSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/ResolvedDataSourceSuite.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.sources import java.time.ZoneId +import org.apache.spark.SparkClassNotFoundException import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.util.DateTimeUtils import org.apache.spark.sql.execution.datasources.DataSource @@ -100,7 +101,7 @@ class ResolvedDataSourceSuite extends SharedSparkSession { } test("error message for unknown data sources") { - val error = intercept[AnalysisException] { + val error = intercept[SparkClassNotFoundException] { getProvidingClass("asfdwefasdfasdf") } checkError(