diff --git a/common/utils/src/main/resources/error/error-classes.json b/common/utils/src/main/resources/error/error-classes.json index 4debf3da0b819..1223ce05c7db6 100644 --- a/common/utils/src/main/resources/error/error-classes.json +++ b/common/utils/src/main/resources/error/error-classes.json @@ -2688,11 +2688,6 @@ ], "sqlState" : "0A000" }, - "UNSUPPORTED_DATA_SOURCE_FOR_DIRECT_QUERY" : { - "message" : [ - "The direct query on files does not support the data source type: . Please try a different data source type or consider using a different query method." - ] - }, "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE" : { "message" : [ "The datasource doesn't support the column of the type ." diff --git a/docs/sql-error-conditions.md b/docs/sql-error-conditions.md index 00fe6d75f538f..d2ed06fe3069a 100644 --- a/docs/sql-error-conditions.md +++ b/docs/sql-error-conditions.md @@ -1884,12 +1884,6 @@ Unsupported data source type for direct query on files: `` Unsupported data type ``. -### UNSUPPORTED_DATA_SOURCE_FOR_DIRECT_QUERY - -SQLSTATE: none assigned - -The direct query on files does not support the data source type: ``. Please try a different data source type or consider using a different query method. - ### UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE SQLSTATE: none assigned diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index 68b34644a0e0e..7b69c86af5227 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -1690,12 +1690,6 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase { "tableSchema" -> tableSchema.toString)) } - def unsupportedDataSourceTypeForDirectQueryOnFilesError(className: String): Throwable = { - new AnalysisException( - errorClass = "UNSUPPORTED_DATA_SOURCE_FOR_DIRECT_QUERY", - messageParameters = Map("className" -> className)) - } - def saveDataIntoViewNotAllowedError(): Throwable = { new AnalysisException( errorClass = "_LEGACY_ERROR_TEMP_1158", diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala index 3f235e10c8171..4cbd54e6d209c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala @@ -45,15 +45,17 @@ class ResolveSQLOnFile(sparkSession: SparkSession) extends Rule[LogicalPlan] { conf.runSQLonFile && u.multipartIdentifier.size == 2 } - private def resolveDataSource(ident: Seq[String]): DataSource = { + private def resolveDataSource(unresolved: UnresolvedRelation): DataSource = { + val ident = unresolved.multipartIdentifier val dataSource = DataSource(sparkSession, paths = Seq(ident.last), className = ident.head) // `dataSource.providingClass` may throw ClassNotFoundException, the caller side will try-catch // it and return the original plan, so that the analyzer can report table not found later. val isFileFormat = classOf[FileFormat].isAssignableFrom(dataSource.providingClass) if (!isFileFormat || dataSource.className.toLowerCase(Locale.ROOT) == DDLUtils.HIVE_PROVIDER) { - throw QueryCompilationErrors.unsupportedDataSourceTypeForDirectQueryOnFilesError( - dataSource.className) + unresolved.failAnalysis( + errorClass = "UNSUPPORTED_DATASOURCE_FOR_DIRECT_QUERY", + messageParameters = Map("dataSourceType" -> ident.head)) } dataSource } @@ -65,7 +67,7 @@ class ResolveSQLOnFile(sparkSession: SparkSession) extends Rule[LogicalPlan] { // fail to time travel. Otherwise, this is some other catalog table that isn't resolved yet, // so we should leave it be for now. try { - resolveDataSource(u.multipartIdentifier) + resolveDataSource(u) throw QueryCompilationErrors.timeTravelUnsupportedError(toSQLId(u.multipartIdentifier)) } catch { case _: ClassNotFoundException => r @@ -73,11 +75,11 @@ class ResolveSQLOnFile(sparkSession: SparkSession) extends Rule[LogicalPlan] { case u: UnresolvedRelation if maybeSQLFile(u) => try { - val ds = resolveDataSource(u.multipartIdentifier) + val ds = resolveDataSource(u) LogicalRelation(ds.resolveRelation()) } catch { case _: ClassNotFoundException => u - case e: Exception => + case e: Exception if !e.isInstanceOf[AnalysisException] => // the provider is valid, but failed to create a logical plan u.failAnalysis( errorClass = "UNSUPPORTED_DATASOURCE_FOR_DIRECT_QUERY", diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 7ad27f05a58f0..0e08fed12ba72 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -1647,18 +1647,16 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark exception = intercept[AnalysisException] { sql("select * from json.invalid_file") }, - errorClass = "UNSUPPORTED_DATASOURCE_FOR_DIRECT_QUERY", - parameters = Map("dataSourceType" -> "json"), - context = ExpectedContext("json.invalid_file", 14, 30) + errorClass = "PATH_NOT_FOUND", + parameters = Map("path" -> "file:/.*invalid_file"), + matchPVals = true ) checkError( exception = intercept[AnalysisException] { sql(s"select id from `org.apache.spark.sql.hive.orc`.`file_path`") }, - errorClass = "UNSUPPORTED_DATASOURCE_FOR_DIRECT_QUERY", - parameters = Map("dataSourceType" -> "org.apache.spark.sql.hive.orc"), - context = ExpectedContext("`org.apache.spark.sql.hive.orc`.`file_path`", 15, 57) + errorClass = "_LEGACY_ERROR_TEMP_1138" ) e = intercept[AnalysisException] { diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index e93576761c87e..9308d1eda146f 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -1354,6 +1354,17 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi }) } + test("SPARK-44520: invalid path for support direct query shall throw correct exception") { + checkError( + exception = intercept[AnalysisException] { + sql(s"select id from parquet.`invalid_path`") + }, + errorClass = "PATH_NOT_FOUND", + parameters = Map("path" -> "file.*invalid_path"), + matchPVals = true + ) + } + test("run sql directly on files - orc") { val df = spark.range(100).toDF() withTempPath(f => {