Skip to content

Commit 24561ca

Browse files
imback82cloud-fan
authored andcommitted
[SPARK-37444][SQL] ALTER NAMESPACE ... SET LOCATION should handle empty location consistently across v1 and v2 command
### What changes were proposed in this pull request? Currently, there is an inconsistency when handling an empty location for `ALTER NAMESPACE .. SET LOCATION` between v1 and v2 command. In v1 command, an empty string location will result in the `IllegalArgumentException` exception thrown whereas v2 uses the empty string as it is. This PR proposes to make the behavior consistent by following the v1 command behavior. ### Why are the changes needed? To make the behavior consistent and the reason for following v1 behavior is that "Spark should be responsible to qualify the user-specified path using its spark/hadoop configs, before passing the path to v2 sources": #34610 (comment) ### Does this PR introduce _any_ user-facing change? Yes, now the empty string location will result in the `IllegalArgumentException` exception thrown even for v2 catalogs. ### How was this patch tested? Added a new test Closes #34686 from imback82/empty_location_fix. Authored-by: Terry Kim <yuminkim@gmail.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com>
1 parent 6c73cee commit 24561ca

File tree

4 files changed

+40
-11
lines changed

4 files changed

+40
-11
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.catalog
1919

2020
import java.net.URI
2121

22+
import org.apache.hadoop.conf.Configuration
2223
import org.apache.hadoop.fs.Path
2324
import org.apache.hadoop.util.Shell
2425

@@ -258,6 +259,24 @@ object CatalogUtils {
258259
new Path(str).toUri
259260
}
260261

262+
def makeQualifiedNamespacePath(
263+
locationUri: URI,
264+
warehousePath: String,
265+
hadoopConf: Configuration): URI = {
266+
if (locationUri.isAbsolute) {
267+
locationUri
268+
} else {
269+
val fullPath = new Path(warehousePath, CatalogUtils.URIToString(locationUri))
270+
makeQualifiedPath(fullPath.toUri, hadoopConf)
271+
}
272+
}
273+
274+
def makeQualifiedPath(path: URI, hadoopConf: Configuration): URI = {
275+
val hadoopPath = new Path(path)
276+
val fs = hadoopPath.getFileSystem(hadoopConf)
277+
fs.makeQualified(hadoopPath).toUri
278+
}
279+
261280
private def normalizeColumnName(
262281
tableName: String,
263282
tableCols: Seq[String],

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -210,9 +210,7 @@ class SessionCatalog(
210210
* FileSystem is changed.
211211
*/
212212
private def makeQualifiedPath(path: URI): URI = {
213-
val hadoopPath = new Path(path)
214-
val fs = hadoopPath.getFileSystem(hadoopConf)
215-
fs.makeQualified(hadoopPath).toUri
213+
CatalogUtils.makeQualifiedPath(path, hadoopConf)
216214
}
217215

218216
private def requireDbExists(db: String): Unit = {
@@ -254,12 +252,7 @@ class SessionCatalog(
254252
}
255253

256254
private def makeQualifiedDBPath(locationUri: URI): URI = {
257-
if (locationUri.isAbsolute) {
258-
locationUri
259-
} else {
260-
val fullPath = new Path(conf.warehousePath, CatalogUtils.URIToString(locationUri))
261-
makeQualifiedPath(fullPath.toUri)
262-
}
255+
CatalogUtils.makeQualifiedNamespacePath(locationUri, conf.warehousePath, hadoopConf)
263256
}
264257

265258
def dropDatabase(db: String, ignoreIfNotExists: Boolean, cascade: Boolean): Unit = {

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import scala.collection.mutable
2222

2323
import org.apache.spark.sql.{SparkSession, Strategy}
2424
import org.apache.spark.sql.catalyst.analysis.{ResolvedDBObjectName, ResolvedNamespace, ResolvedPartitionSpec, ResolvedTable}
25+
import org.apache.spark.sql.catalyst.catalog.CatalogUtils
2526
import org.apache.spark.sql.catalyst.expressions
2627
import org.apache.spark.sql.catalyst.expressions.{And, Attribute, DynamicPruning, EmptyRow, Expression, Literal, NamedExpression, PredicateHelper, SubqueryExpression}
2728
import org.apache.spark.sql.catalyst.planning.PhysicalOperation
@@ -38,6 +39,7 @@ import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors
3839
import org.apache.spark.sql.execution.{FilterExec, LeafExecNode, LocalTableScanExec, ProjectExec, RowDataSourceScanExec, SparkPlan}
3940
import org.apache.spark.sql.execution.datasources.{DataSourceStrategy, PushableColumn, PushableColumnBase}
4041
import org.apache.spark.sql.execution.streaming.continuous.{WriteToContinuousDataSource, WriteToContinuousDataSourceExec}
42+
import org.apache.spark.sql.internal.StaticSQLConf.WAREHOUSE_PATH
4143
import org.apache.spark.sql.sources.{BaseRelation, TableScan}
4244
import org.apache.spark.sql.types.{BooleanType, StringType}
4345
import org.apache.spark.sql.util.CaseInsensitiveStringMap
@@ -311,10 +313,13 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
311313
AlterNamespaceSetPropertiesExec(catalog.asNamespaceCatalog, ns, properties) :: Nil
312314

313315
case SetNamespaceLocation(ResolvedNamespace(catalog, ns), location) =>
316+
val warehousePath = session.sharedState.conf.get(WAREHOUSE_PATH)
317+
val nsPath = CatalogUtils.makeQualifiedNamespacePath(
318+
CatalogUtils.stringToURI(location), warehousePath, session.sharedState.hadoopConf)
314319
AlterNamespaceSetPropertiesExec(
315320
catalog.asNamespaceCatalog,
316321
ns,
317-
Map(SupportsNamespaces.PROP_LOCATION -> location)) :: Nil
322+
Map(SupportsNamespaces.PROP_LOCATION -> CatalogUtils.URIToString(nsPath))) :: Nil
318323

319324
case CommentOnNamespace(ResolvedNamespace(catalog, ns), comment) =>
320325
AlterNamespaceSetPropertiesExec(

sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1294,13 +1294,25 @@ class DataSourceV2SQLSuite
12941294
assert(descriptionDf.collect() === Seq(
12951295
Row("Namespace Name", "ns2"),
12961296
Row(SupportsNamespaces.PROP_COMMENT.capitalize, "test namespace"),
1297-
Row(SupportsNamespaces.PROP_LOCATION.capitalize, "/tmp/ns_test_2"),
1297+
Row(SupportsNamespaces.PROP_LOCATION.capitalize, "file:/tmp/ns_test_2"),
12981298
Row(SupportsNamespaces.PROP_OWNER.capitalize, defaultUser),
12991299
Row("Properties", ""))
13001300
)
13011301
}
13021302
}
13031303

1304+
test("SPARK-37444: ALTER NAMESPACE .. SET LOCATION using v2 catalog with empty location") {
1305+
val ns = "testcat.ns1.ns2"
1306+
withNamespace(ns) {
1307+
sql(s"CREATE NAMESPACE IF NOT EXISTS $ns COMMENT " +
1308+
"'test namespace' LOCATION '/tmp/ns_test_1'")
1309+
val e = intercept[IllegalArgumentException] {
1310+
sql(s"ALTER DATABASE $ns SET LOCATION ''")
1311+
}
1312+
assert(e.getMessage.contains("Can not create a Path from an empty string"))
1313+
}
1314+
}
1315+
13041316
private def testShowNamespaces(
13051317
sqlText: String,
13061318
expected: Seq[String]): Unit = {

0 commit comments

Comments
 (0)