Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions common/utils/src/main/resources/error/error-classes.json
Original file line number Diff line number Diff line change
Expand Up @@ -2688,11 +2688,6 @@
],
"sqlState" : "0A000"
},
"UNSUPPORTED_DATA_SOURCE_FOR_DIRECT_QUERY" : {
"message" : [
"The direct query on files does not support the data source type: <className>. Please try a different data source type or consider using a different query method."
]
},
"UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE" : {
"message" : [
"The <format> datasource doesn't support the column <columnName> of the type <columnType>."
Expand Down
6 changes: 0 additions & 6 deletions docs/sql-error-conditions.md
Original file line number Diff line number Diff line change
Expand Up @@ -1884,12 +1884,6 @@ Unsupported data source type for direct query on files: `<dataSourceType>`

Unsupported data type `<typeName>`.

### UNSUPPORTED_DATA_SOURCE_FOR_DIRECT_QUERY

SQLSTATE: none assigned

The direct query on files does not support the data source type: `<className>`. Please try a different data source type or consider using a different query method.

### UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE

SQLSTATE: none assigned
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1690,12 +1690,6 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase {
"tableSchema" -> tableSchema.toString))
}

def unsupportedDataSourceTypeForDirectQueryOnFilesError(className: String): Throwable = {
new AnalysisException(
errorClass = "UNSUPPORTED_DATA_SOURCE_FOR_DIRECT_QUERY",
messageParameters = Map("className" -> className))
}

def saveDataIntoViewNotAllowedError(): Throwable = {
new AnalysisException(
errorClass = "_LEGACY_ERROR_TEMP_1158",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,15 +45,17 @@ class ResolveSQLOnFile(sparkSession: SparkSession) extends Rule[LogicalPlan] {
conf.runSQLonFile && u.multipartIdentifier.size == 2
}

private def resolveDataSource(ident: Seq[String]): DataSource = {
private def resolveDataSource(unresolved: UnresolvedRelation): DataSource = {
val ident = unresolved.multipartIdentifier
val dataSource = DataSource(sparkSession, paths = Seq(ident.last), className = ident.head)
// `dataSource.providingClass` may throw ClassNotFoundException, the caller side will try-catch
// it and return the original plan, so that the analyzer can report table not found later.
val isFileFormat = classOf[FileFormat].isAssignableFrom(dataSource.providingClass)
if (!isFileFormat ||
dataSource.className.toLowerCase(Locale.ROOT) == DDLUtils.HIVE_PROVIDER) {
throw QueryCompilationErrors.unsupportedDataSourceTypeForDirectQueryOnFilesError(
dataSource.className)
unresolved.failAnalysis(
errorClass = "UNSUPPORTED_DATASOURCE_FOR_DIRECT_QUERY",
messageParameters = Map("dataSourceType" -> ident.head))
}
dataSource
}
Expand All @@ -65,19 +67,19 @@ class ResolveSQLOnFile(sparkSession: SparkSession) extends Rule[LogicalPlan] {
// fail to time travel. Otherwise, this is some other catalog table that isn't resolved yet,
// so we should leave it be for now.
try {
resolveDataSource(u.multipartIdentifier)
resolveDataSource(u)
throw QueryCompilationErrors.timeTravelUnsupportedError(toSQLId(u.multipartIdentifier))
} catch {
case _: ClassNotFoundException => r
}

case u: UnresolvedRelation if maybeSQLFile(u) =>
try {
val ds = resolveDataSource(u.multipartIdentifier)
val ds = resolveDataSource(u)
LogicalRelation(ds.resolveRelation())
} catch {
case _: ClassNotFoundException => u
case e: Exception =>
case e: Exception if !e.isInstanceOf[AnalysisException] =>
// the provider is valid, but failed to create a logical plan
u.failAnalysis(
errorClass = "UNSUPPORTED_DATASOURCE_FOR_DIRECT_QUERY",
Expand Down
10 changes: 4 additions & 6 deletions sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1647,18 +1647,16 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
exception = intercept[AnalysisException] {
sql("select * from json.invalid_file")
},
errorClass = "UNSUPPORTED_DATASOURCE_FOR_DIRECT_QUERY",
parameters = Map("dataSourceType" -> "json"),
context = ExpectedContext("json.invalid_file", 14, 30)
errorClass = "PATH_NOT_FOUND",
parameters = Map("path" -> "file:/.*invalid_file"),
matchPVals = true
)

checkError(
exception = intercept[AnalysisException] {
sql(s"select id from `org.apache.spark.sql.hive.orc`.`file_path`")
},
errorClass = "UNSUPPORTED_DATASOURCE_FOR_DIRECT_QUERY",
parameters = Map("dataSourceType" -> "org.apache.spark.sql.hive.orc"),
context = ExpectedContext("`org.apache.spark.sql.hive.orc`.`file_path`", 15, 57)
errorClass = "_LEGACY_ERROR_TEMP_1138"
)

e = intercept[AnalysisException] {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1354,6 +1354,17 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi
})
}

test("SPARK-44520: invalid path for support direct query shall throw correct exception") {
checkError(
exception = intercept[AnalysisException] {
sql(s"select id from parquet.`invalid_path`")
},
errorClass = "PATH_NOT_FOUND",
parameters = Map("path" -> "file.*invalid_path"),
matchPVals = true
)
}

test("run sql directly on files - orc") {
val df = spark.range(100).toDF()
withTempPath(f => {
Expand Down