diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala index 70e5108d938b..7614a270117a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala @@ -22,6 +22,8 @@ import java.util.regex.Pattern import scala.collection.mutable import scala.util.control.NonFatal +import org.apache.hadoop.fs.Path + import org.apache.spark.internal.Logging import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.TableIdentifier @@ -354,7 +356,27 @@ object CreateDataSourceTableUtils extends Logging { tableType = tableType, schema = Nil, storage = CatalogStorageFormat( - locationUri = None, + // We don't want Hive metastore to implicitly create a table directory, + // which may be not the one Data Source table is referring to, + // yet which will be left behind when the table is dropped for an external table + locationUri = if (new CaseInsensitiveMap(options).get("path").isDefined) { + val path = new Path(new CaseInsensitiveMap(options).get("path").get) + val fs = path.getFileSystem(sparkSession.sessionState.newHadoopConf()) + if (fs.exists(path)) { + // if the provided path exists, Hive metastore only takes directory + // as table data location + if (fs.getFileStatus(path).isDirectory) { + Some(path.toUri.toString) + } else { + Some(path.getParent.toUri.toString) + } + } else { + // If the path does not exists yet, it is assumed to be directory + Some(path.toUri.toString) + } + } else { + None + }, inputFormat = None, outputFormat = None, serde = None, diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala index b507018e58d1..a6c387383c5b 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala @@ -1104,4 +1104,24 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv } } } + + test("SPARK-15269: non-hive compative table") { + withTempPath { dir => + val path = dir.getCanonicalPath + val df = spark.range(1).toDF() + + val hadoopPath = new Path(path, "data") + val fs = hadoopPath.getFileSystem(spark.sessionState.newHadoopConf()) + val qualified = hadoopPath.makeQualified(fs.getUri, fs.getWorkingDirectory) + fs.delete(qualified, true) + df.write.mode(SaveMode.Overwrite).json(qualified.toUri.toString) + + withTable("ddl_test1") { + sql(s"CREATE TABLE ddl_test1 USING json OPTIONS (PATH '${qualified.toUri.toString}')") + sql("DROP TABLE ddl_test1") + sql(s"CREATE TABLE ddl_test1 USING json AS SELECT 10 AS a") + checkAnswer(sql("select * from ddl_test1"), Seq(Row(10))) + } + } + } }