From 1eb40e0e5916e69df200f9a817b61ffa93bd35da Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Sun, 7 Aug 2016 21:15:46 -0700 Subject: [PATCH 01/17] fix --- .../spark/sql/execution/command/tables.scala | 39 +++++- .../sql/hive/execution/HiveDDLSuite.scala | 112 ++++++++++++++++++ 2 files changed, 146 insertions(+), 5 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index e6fe9a73a1f3..c79cf7b98942 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -80,11 +80,40 @@ case class CreateTableLikeCommand( s"Source table in CREATE TABLE LIKE cannot be temporary: '$sourceTable'") } - val tableToCreate = catalog.getTableMetadata(sourceTable).copy( - identifier = targetTable, - tableType = CatalogTableType.MANAGED, - createTime = System.currentTimeMillis, - lastAccessTime = -1).withNewStorage(locationUri = None) + val sourceTableDesc = catalog.getTableMetadata(sourceTable) + + val tableToCreate = { + // For EXTERNAL_TABLE, the table properties has a particular field. To change it + // to a MANAGED_TABLE, we need to remove it; Otherwise, it will be EXTERNAL_TABLE, + // even if we set the tableType to MANAGED + // (metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java#L1095-L1105) + val tableProp = sourceTableDesc.properties.filterKeys(_ != "EXTERNAL") + + if (DDLUtils.isDatasourceTable(sourceTableDesc)) { + if (sourceTableDesc.tableType != CatalogTableType.MANAGED) { + throw new AnalysisException( + "CREATE TABLE LIKE is not allowed when the source table is external tables created " + + "using the datasource API") + } else { + val newPath = catalog.defaultTablePath(targetTable) + sourceTableDesc.copy( + identifier = targetTable, + tableType = CatalogTableType.MANAGED, + createTime = System.currentTimeMillis, + properties = tableProp, + lastAccessTime = -1).withNewStorage( + locationUri = None, + serdeProperties = sourceTableDesc.storage.properties ++ Map("path" -> newPath)) + } + } else { + sourceTableDesc.copy( + identifier = targetTable, + tableType = CatalogTableType.MANAGED, + createTime = System.currentTimeMillis, + properties = tableProp, + lastAccessTime = -1).withNewStorage(locationUri = None) + } + } catalog.createTable(tableToCreate, ifNotExists) Seq.empty[Row] diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index e078b585420f..950db2ef3af9 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -26,6 +26,7 @@ import org.apache.spark.internal.config._ import org.apache.spark.sql.{AnalysisException, QueryTest, Row, SaveMode} import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, CatalogTableType} import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.execution.command.DDLUtils import org.apache.spark.sql.hive.test.TestHiveSingleton import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SQLTestUtils @@ -617,6 +618,117 @@ class HiveDDLSuite } } + test("CREATE TABLE LIKE a data source table") { + val sourceTabName = "tab1" + val targetTabName = "tab2" + withTable(sourceTabName, targetTabName) { + spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd) + .write.format("json").saveAsTable(sourceTabName) + + val sourceTable = + spark.sessionState.catalog.getTableMetadata(TableIdentifier(sourceTabName, Some("default"))) + val sourceTablePath = sourceTable.storage.properties.get("path") + + sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName") + + val targetTable = + spark.sessionState.catalog.getTableMetadata(TableIdentifier(targetTabName, Some("default"))) + val targetTablePath = targetTable.storage.properties.get("path") + + // The table path should be different + assert(sourceTablePath != targetTablePath) + // The source table contents should not been seen in the target table. + // The target table should be empty + assert(spark.table(sourceTabName).count() != 0) + assert(spark.table(targetTabName).count() == 0) + + // The table type of both source and target table should be the Hive-managed data source table + assert(DDLUtils.isDatasourceTable(sourceTable)) + assert(sourceTable.tableType == CatalogTableType.MANAGED) + assert(DDLUtils.isDatasourceTable(targetTable)) + assert(targetTable.tableType == CatalogTableType.MANAGED) + + // Their schema should be identical + checkAnswer( + sql(s"DESC $sourceTabName"), + sql(s"DESC $targetTabName")) + } + } + + test("CREATE TABLE LIKE an external data source table") { + val sourceTabName = "tab1" + val targetTabName = "tab2" + withTable(sourceTabName, targetTabName) { + withTempPath { dir => + val path = dir.getCanonicalPath + spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd) + .write.format("parquet").save(path) + sql(s"CREATE TABLE $sourceTabName USING parquet OPTIONS (PATH '$path')") + + // The source table should be an external data source table + val sourceTable = spark.sessionState.catalog.getTableMetadata( + TableIdentifier(sourceTabName, Some("default"))) + assert(DDLUtils.isDatasourceTable(sourceTable)) + assert(sourceTable.tableType == CatalogTableType.EXTERNAL) + + val e = intercept[AnalysisException] { + sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName") + }.getMessage + assert(e.contains("CREATE TABLE LIKE is not allowed when the source table is " + + "external tables created using the datasource API")) + } + } + } + + test("CREATE TABLE LIKE an external Hive serde table") { + val catalog = spark.sessionState.catalog + withTempDir { tmpDir => + val basePath = tmpDir.getCanonicalPath + val sourceTabName = "tab1" + val targetTabName = "tab2" + withTable(sourceTabName, targetTabName) { + assert(tmpDir.listFiles.isEmpty) + sql( + s""" + |CREATE EXTERNAL TABLE $sourceTabName (key INT, value STRING) + |PARTITIONED BY (ds STRING, hr STRING) + |LOCATION '$basePath' + """.stripMargin) + + for (ds <- Seq("2008-04-08", "2008-04-09"); hr <- Seq("11", "12")) { + sql( + s""" + |INSERT OVERWRITE TABLE $sourceTabName + |partition (ds='$ds',hr='$hr') + |SELECT 1, 'a' + """.stripMargin) + } + + sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName") + + val sourceTable = catalog.getTableMetadata(TableIdentifier(sourceTabName, Some("default"))) + val sourceTablePath = sourceTable.storage.locationUri + assert(sourceTable.tableType == CatalogTableType.EXTERNAL) + + val targetTable = catalog.getTableMetadata(TableIdentifier(targetTabName, Some("default"))) + val targetTablePath = targetTable.storage.locationUri + assert(targetTable.tableType == CatalogTableType.MANAGED) + + // The table path should be different + assert(sourceTablePath != targetTablePath) + // The source table contents should not been seen in the target table. + // The target table should be empty + assert(spark.table(sourceTabName).count() != 0) + assert(spark.table(targetTabName).count() == 0) + + // Their schema should be identical + checkAnswer( + sql(s"DESC $sourceTabName"), + sql(s"DESC $targetTabName")) + } + } + } + test("desc table for data source table") { withTable("tab1") { val tabName = "tab1" From d0e9217888f3ed37e5e29a6a790371fcc667f433 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Mon, 8 Aug 2016 11:41:01 -0700 Subject: [PATCH 02/17] fix the issue in VIEW --- .../spark/sql/execution/command/tables.scala | 8 +++- .../sql/hive/execution/HiveDDLSuite.scala | 48 +++++++++++++++++-- 2 files changed, 49 insertions(+), 7 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index c79cf7b98942..a4f78679f00b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -101,7 +101,9 @@ case class CreateTableLikeCommand( tableType = CatalogTableType.MANAGED, createTime = System.currentTimeMillis, properties = tableProp, - lastAccessTime = -1).withNewStorage( + lastAccessTime = -1, + viewOriginalText = None, + viewText = None).withNewStorage( locationUri = None, serdeProperties = sourceTableDesc.storage.properties ++ Map("path" -> newPath)) } @@ -111,7 +113,9 @@ case class CreateTableLikeCommand( tableType = CatalogTableType.MANAGED, createTime = System.currentTimeMillis, properties = tableProp, - lastAccessTime = -1).withNewStorage(locationUri = None) + lastAccessTime = -1, + viewOriginalText = None, + viewText = None).withNewStorage(locationUri = None) } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index 950db2ef3af9..3a18d7efa206 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -624,13 +624,11 @@ class HiveDDLSuite withTable(sourceTabName, targetTabName) { spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd) .write.format("json").saveAsTable(sourceTabName) + sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName") val sourceTable = spark.sessionState.catalog.getTableMetadata(TableIdentifier(sourceTabName, Some("default"))) val sourceTablePath = sourceTable.storage.properties.get("path") - - sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName") - val targetTable = spark.sessionState.catalog.getTableMetadata(TableIdentifier(targetTabName, Some("default"))) val targetTablePath = targetTable.storage.properties.get("path") @@ -694,7 +692,6 @@ class HiveDDLSuite |PARTITIONED BY (ds STRING, hr STRING) |LOCATION '$basePath' """.stripMargin) - for (ds <- Seq("2008-04-08", "2008-04-09"); hr <- Seq("11", "12")) { sql( s""" @@ -703,7 +700,6 @@ class HiveDDLSuite |SELECT 1, 'a' """.stripMargin) } - sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName") val sourceTable = catalog.getTableMetadata(TableIdentifier(sourceTabName, Some("default"))) @@ -729,6 +725,48 @@ class HiveDDLSuite } } + test("CREATE TABLE LIKE a view") { + val sourceTabName = "tab1" + val sourceViewName = "view" + val targetTabName = "tab2" + withTable(sourceTabName, targetTabName) { + withView(sourceViewName) { + spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd) + .write.format("json").saveAsTable(sourceTabName) + sql(s"CREATE VIEW $sourceViewName AS SELECT * FROM $sourceTabName") + sql(s"CREATE TABLE $targetTabName LIKE $sourceViewName") + + val sourceView = spark.sessionState.catalog.getTableMetadata( + TableIdentifier(sourceViewName, Some("default"))) + val sourceViewPath = sourceView.storage.locationUri + val targetTable = spark.sessionState.catalog.getTableMetadata( + TableIdentifier(targetTabName, Some("default"))) + val targetTablePath = targetTable.storage.locationUri + + // The source table contents should not been seen in the target table. + // The target table should be empty + assert(spark.table(sourceTabName).count() != 0) + assert(spark.table(targetTabName).count() == 0) + + // The original source should be a VIEW with an empty path + assert(sourceView.tableType == CatalogTableType.VIEW) + assert(sourceView.viewText.nonEmpty && sourceView.viewOriginalText.nonEmpty) + assert(sourceViewPath.isEmpty) + + // The original source should be a MANAGED table with empty view text and original text + // The location of table should not be empty. + assert(targetTable.tableType == CatalogTableType.MANAGED) + assert(targetTable.viewText.isEmpty && targetTable.viewOriginalText.isEmpty) + assert(targetTablePath.nonEmpty) + + // Their schema should be identical + checkAnswer( + sql(s"DESC $sourceViewName"), + sql(s"DESC $targetTabName")) + } + } + } + test("desc table for data source table") { withTable("tab1") { val tabName = "tab1" From 29e17a26f7a4a6f39b4f13187562c5e21899b271 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Mon, 8 Aug 2016 15:04:41 -0700 Subject: [PATCH 03/17] fix the issue in comment and index table. --- .../spark/sql/execution/command/tables.scala | 12 +- .../sql/hive/execution/HiveDDLSuite.scala | 129 ++++++++++-------- 2 files changed, 83 insertions(+), 58 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index a4f78679f00b..4ba1ef8a0d8f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -82,12 +82,20 @@ case class CreateTableLikeCommand( val sourceTableDesc = catalog.getTableMetadata(sourceTable) + sourceTableDesc.tableType match { + case CatalogTableType.MANAGED | CatalogTableType.EXTERNAL | CatalogTableType.VIEW => // OK + case o => throw new AnalysisException( + s"CREATE TABLE LIKE is not allowed when the source table is ${o.name}") + } + val tableToCreate = { // For EXTERNAL_TABLE, the table properties has a particular field. To change it // to a MANAGED_TABLE, we need to remove it; Otherwise, it will be EXTERNAL_TABLE, // even if we set the tableType to MANAGED // (metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java#L1095-L1105) - val tableProp = sourceTableDesc.properties.filterKeys(_ != "EXTERNAL") + // Table comment is stored as a table property. To clean it, we also should remove them. + val tableProp = + sourceTableDesc.properties.filterKeys(key => key != "EXTERNAL" && key != "comment") if (DDLUtils.isDatasourceTable(sourceTableDesc)) { if (sourceTableDesc.tableType != CatalogTableType.MANAGED) { @@ -102,6 +110,7 @@ case class CreateTableLikeCommand( createTime = System.currentTimeMillis, properties = tableProp, lastAccessTime = -1, + comment = None, viewOriginalText = None, viewText = None).withNewStorage( locationUri = None, @@ -114,6 +123,7 @@ case class CreateTableLikeCommand( createTime = System.currentTimeMillis, properties = tableProp, lastAccessTime = -1, + comment = None, viewOriginalText = None, viewText = None).withNewStorage(locationUri = None) } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index 3a18d7efa206..2b454f15efe0 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -24,7 +24,7 @@ import org.scalatest.BeforeAndAfterEach import org.apache.spark.internal.config._ import org.apache.spark.sql.{AnalysisException, QueryTest, Row, SaveMode} -import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, CatalogTableType} +import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, CatalogTable, CatalogTableType} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.execution.command.DDLUtils import org.apache.spark.sql.hive.test.TestHiveSingleton @@ -628,28 +628,13 @@ class HiveDDLSuite val sourceTable = spark.sessionState.catalog.getTableMetadata(TableIdentifier(sourceTabName, Some("default"))) - val sourceTablePath = sourceTable.storage.properties.get("path") val targetTable = spark.sessionState.catalog.getTableMetadata(TableIdentifier(targetTabName, Some("default"))) - val targetTablePath = targetTable.storage.properties.get("path") - - // The table path should be different - assert(sourceTablePath != targetTablePath) - // The source table contents should not been seen in the target table. - // The target table should be empty - assert(spark.table(sourceTabName).count() != 0) - assert(spark.table(targetTabName).count() == 0) - - // The table type of both source and target table should be the Hive-managed data source table + // The table type of the source table should be a Hive-managed data source table assert(DDLUtils.isDatasourceTable(sourceTable)) assert(sourceTable.tableType == CatalogTableType.MANAGED) - assert(DDLUtils.isDatasourceTable(targetTable)) - assert(targetTable.tableType == CatalogTableType.MANAGED) - // Their schema should be identical - checkAnswer( - sql(s"DESC $sourceTabName"), - sql(s"DESC $targetTabName")) + checkCreateTableLike(sourceTable, targetTable) } } @@ -678,6 +663,22 @@ class HiveDDLSuite } } + test("CREATE TABLE LIKE a managed Hive serde table") { + val catalog = spark.sessionState.catalog + val sourceTabName = "tab1" + val targetTabName = "tab2" + withTable(sourceTabName, targetTabName) { + sql(s"CREATE TABLE $sourceTabName AS SELECT 1 key, 'a' value") + sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName") + + val sourceTable = catalog.getTableMetadata(TableIdentifier(sourceTabName, Some("default"))) + assert(sourceTable.tableType == CatalogTableType.MANAGED) + val targetTable = catalog.getTableMetadata(TableIdentifier(targetTabName, Some("default"))) + + checkCreateTableLike(sourceTable, targetTable) + } + } + test("CREATE TABLE LIKE an external Hive serde table") { val catalog = spark.sessionState.catalog withTempDir { tmpDir => @@ -688,7 +689,8 @@ class HiveDDLSuite assert(tmpDir.listFiles.isEmpty) sql( s""" - |CREATE EXTERNAL TABLE $sourceTabName (key INT, value STRING) + |CREATE EXTERNAL TABLE $sourceTabName (key INT comment 'test', value STRING) + |COMMENT 'Apache Spark' |PARTITIONED BY (ds STRING, hr STRING) |LOCATION '$basePath' """.stripMargin) @@ -703,24 +705,11 @@ class HiveDDLSuite sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName") val sourceTable = catalog.getTableMetadata(TableIdentifier(sourceTabName, Some("default"))) - val sourceTablePath = sourceTable.storage.locationUri assert(sourceTable.tableType == CatalogTableType.EXTERNAL) - + assert(sourceTable.properties.get("comment") == Option("Apache Spark")) val targetTable = catalog.getTableMetadata(TableIdentifier(targetTabName, Some("default"))) - val targetTablePath = targetTable.storage.locationUri - assert(targetTable.tableType == CatalogTableType.MANAGED) - - // The table path should be different - assert(sourceTablePath != targetTablePath) - // The source table contents should not been seen in the target table. - // The target table should be empty - assert(spark.table(sourceTabName).count() != 0) - assert(spark.table(targetTabName).count() == 0) - - // Their schema should be identical - checkAnswer( - sql(s"DESC $sourceTabName"), - sql(s"DESC $targetTabName")) + + checkCreateTableLike(sourceTable, targetTable) } } } @@ -738,35 +727,61 @@ class HiveDDLSuite val sourceView = spark.sessionState.catalog.getTableMetadata( TableIdentifier(sourceViewName, Some("default"))) - val sourceViewPath = sourceView.storage.locationUri - val targetTable = spark.sessionState.catalog.getTableMetadata( - TableIdentifier(targetTabName, Some("default"))) - val targetTablePath = targetTable.storage.locationUri - - // The source table contents should not been seen in the target table. - // The target table should be empty - assert(spark.table(sourceTabName).count() != 0) - assert(spark.table(targetTabName).count() == 0) - // The original source should be a VIEW with an empty path assert(sourceView.tableType == CatalogTableType.VIEW) assert(sourceView.viewText.nonEmpty && sourceView.viewOriginalText.nonEmpty) - assert(sourceViewPath.isEmpty) - - // The original source should be a MANAGED table with empty view text and original text - // The location of table should not be empty. - assert(targetTable.tableType == CatalogTableType.MANAGED) - assert(targetTable.viewText.isEmpty && targetTable.viewOriginalText.isEmpty) - assert(targetTablePath.nonEmpty) - - // Their schema should be identical - checkAnswer( - sql(s"DESC $sourceViewName"), - sql(s"DESC $targetTabName")) + val targetTable = spark.sessionState.catalog.getTableMetadata( + TableIdentifier(targetTabName, Some("default"))) + + checkCreateTableLike(sourceView, targetTable) } } } + private def getTablePath(table: CatalogTable): Option[String] = { + if (DDLUtils.isDatasourceTable(table)) { + table.storage.properties.get("path") + } else { + table.storage.locationUri + } + } + + private def checkCreateTableLike(sourceTable: CatalogTable, targetTable: CatalogTable): Unit = { + // The original source should be a MANAGED table with empty view text and original text + // The location of table should not be empty. + assert(targetTable.tableType == CatalogTableType.MANAGED, + "the created table must be a Hive managed table") + assert(targetTable.viewText.isEmpty && targetTable.viewOriginalText.isEmpty, + "the view text and original text in the created table must be empty") + assert(targetTable.comment.isEmpty, + "the comment in the created table must be empty") + assert(targetTable.properties.get("comment").isEmpty, + "the comment in the created table must be empty") + + if (DDLUtils.isDatasourceTable(sourceTable)) { + assert(DDLUtils.isDatasourceTable(targetTable), + "the target table should be a data source table") + } else { + assert(!DDLUtils.isDatasourceTable(targetTable), + "the target table should be a Hive serde table") + } + + val sourceTablePath = getTablePath(sourceTable) + val targetTablePath = getTablePath(targetTable) + assert(targetTablePath.nonEmpty, "target table path should not be empty") + assert(sourceTablePath != targetTablePath, + "source table/view path should be different from target table path") + + // The source table contents should not been seen in the target table. + assert(spark.table(sourceTable.identifier).count() != 0, "the source table should be nonempty") + assert(spark.table(targetTable.identifier).count() == 0, "the target table should be empty") + + // Their schema should be identical + checkAnswer( + sql(s"DESC ${sourceTable.identifier}"), + sql(s"DESC ${targetTable.identifier}")) + } + test("desc table for data source table") { withTable("tab1") { val tabName = "tab1" From 9d70eb5ca8ad42cc8b7a05ee43519a52bff604b9 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Mon, 8 Aug 2016 17:07:14 -0700 Subject: [PATCH 04/17] code clean --- .../spark/sql/execution/command/tables.scala | 70 +++++++++---------- 1 file changed, 32 insertions(+), 38 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index 4ba1ef8a0d8f..819d811ae92c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -88,48 +88,42 @@ case class CreateTableLikeCommand( s"CREATE TABLE LIKE is not allowed when the source table is ${o.name}") } - val tableToCreate = { - // For EXTERNAL_TABLE, the table properties has a particular field. To change it - // to a MANAGED_TABLE, we need to remove it; Otherwise, it will be EXTERNAL_TABLE, - // even if we set the tableType to MANAGED - // (metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java#L1095-L1105) - // Table comment is stored as a table property. To clean it, we also should remove them. - val tableProp = - sourceTableDesc.properties.filterKeys(key => key != "EXTERNAL" && key != "comment") + if (DDLUtils.isDatasourceTable(sourceTableDesc) && + sourceTableDesc.tableType != CatalogTableType.MANAGED) { + throw new AnalysisException( + "CREATE TABLE LIKE is not allowed when the source table is external tables created " + + "using the datasource API") + } + // For EXTERNAL_TABLE, the table properties has a particular field. To change it + // to a MANAGED_TABLE, we need to remove it; Otherwise, it will be EXTERNAL_TABLE, + // even if we set the tableType to MANAGED + // (metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java#L1095-L1105) + // Table comment is stored as a table property. To clean it, we also should remove them. + val newTableProp = + sourceTableDesc.properties.filterKeys(key => key != "EXTERNAL" && key != "comment") + + val newSerdeProp = if (DDLUtils.isDatasourceTable(sourceTableDesc)) { - if (sourceTableDesc.tableType != CatalogTableType.MANAGED) { - throw new AnalysisException( - "CREATE TABLE LIKE is not allowed when the source table is external tables created " + - "using the datasource API") - } else { - val newPath = catalog.defaultTablePath(targetTable) - sourceTableDesc.copy( - identifier = targetTable, - tableType = CatalogTableType.MANAGED, - createTime = System.currentTimeMillis, - properties = tableProp, - lastAccessTime = -1, - comment = None, - viewOriginalText = None, - viewText = None).withNewStorage( - locationUri = None, - serdeProperties = sourceTableDesc.storage.properties ++ Map("path" -> newPath)) - } + val newPath = catalog.defaultTablePath(targetTable) + sourceTableDesc.storage.properties ++ Map("path" -> newPath) } else { - sourceTableDesc.copy( - identifier = targetTable, - tableType = CatalogTableType.MANAGED, - createTime = System.currentTimeMillis, - properties = tableProp, - lastAccessTime = -1, - comment = None, - viewOriginalText = None, - viewText = None).withNewStorage(locationUri = None) + sourceTableDesc.storage.properties } - } - - catalog.createTable(tableToCreate, ifNotExists) + val newTableDesc = + sourceTableDesc.copy( + identifier = targetTable, + tableType = CatalogTableType.MANAGED, + createTime = System.currentTimeMillis, + properties = newTableProp, + lastAccessTime = -1, + comment = None, + viewOriginalText = None, + viewText = None).withNewStorage( + locationUri = None, + serdeProperties = newSerdeProp) + + catalog.createTable(newTableDesc, ifNotExists) Seq.empty[Row] } } From 84347777d8684526bb35fd8af62f13bb4aa06ba5 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Mon, 8 Aug 2016 17:08:07 -0700 Subject: [PATCH 05/17] code clean --- .../scala/org/apache/spark/sql/execution/command/tables.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index 819d811ae92c..913fc665d249 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -102,7 +102,6 @@ case class CreateTableLikeCommand( // Table comment is stored as a table property. To clean it, we also should remove them. val newTableProp = sourceTableDesc.properties.filterKeys(key => key != "EXTERNAL" && key != "comment") - val newSerdeProp = if (DDLUtils.isDatasourceTable(sourceTableDesc)) { val newPath = catalog.defaultTablePath(targetTable) From b820be831ecdecb3261bf9eb1171ac8545748aa3 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Mon, 8 Aug 2016 21:36:55 -0700 Subject: [PATCH 06/17] add comments and improve the test cases --- .../apache/spark/sql/execution/command/tables.scala | 9 ++++++--- .../spark/sql/hive/execution/HiveDDLSuite.scala | 12 +++++++++++- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index 913fc665d249..44486ac923ae 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -56,7 +56,10 @@ case class CreateHiveTableAsSelectLogicalPlan( } /** - * A command to create a table with the same definition of the given existing table. + * A command to create a MANAGED table with the same definition of the given existing table. + * The source table cannot be temporary table, Index table or an external table created using + * the datasource API. In the target table definition, the table comment is always empty but + * the column comments are identical to the ones defined in the source table. * * The syntax of using this command in SQL is: * {{{ @@ -91,7 +94,7 @@ case class CreateTableLikeCommand( if (DDLUtils.isDatasourceTable(sourceTableDesc) && sourceTableDesc.tableType != CatalogTableType.MANAGED) { throw new AnalysisException( - "CREATE TABLE LIKE is not allowed when the source table is external tables created " + + "CREATE TABLE LIKE is not allowed when the source table is an external table created " + "using the datasource API") } @@ -604,7 +607,7 @@ case class ShowTablesCommand( /** - * A command for users to list the properties for a table If propertyKey is specified, the value + * A command for users to list the properties for a table. If propertyKey is specified, the value * for the propertyKey is returned. If propertyKey is not specified, all the keys and their * corresponding values are returned. * The syntax of using this command in SQL is: diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index 2b454f15efe0..cfc4702e5727 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -658,7 +658,7 @@ class HiveDDLSuite sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName") }.getMessage assert(e.contains("CREATE TABLE LIKE is not allowed when the source table is " + - "external tables created using the datasource API")) + "an external table created using the datasource API")) } } } @@ -780,6 +780,16 @@ class HiveDDLSuite checkAnswer( sql(s"DESC ${sourceTable.identifier}"), sql(s"DESC ${targetTable.identifier}")) + + withSQLConf("hive.exec.dynamic.partition.mode" -> "nonstrict") { + // Check whether the new table can be inserted using the data from the original table + sql(s"INSERT INTO TABLE ${targetTable.identifier} SELECT * FROM ${sourceTable.identifier}") + } + + // After insertion, the data should be identical + checkAnswer( + sql(s"SELECT * FROM ${sourceTable.identifier}"), + sql(s"SELECT * FROM ${targetTable.identifier}")) } test("desc table for data source table") { From 45b51d1061cbd3f02d9f332875458aa359acff84 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Tue, 9 Aug 2016 12:14:16 -0700 Subject: [PATCH 07/17] address comments --- .../spark/sql/execution/command/tables.scala | 26 +++++++++---------- .../sql/hive/execution/HiveDDLSuite.scala | 13 +++++----- 2 files changed, 20 insertions(+), 19 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index 44486ac923ae..904db6a42e8a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -57,9 +57,13 @@ case class CreateHiveTableAsSelectLogicalPlan( /** * A command to create a MANAGED table with the same definition of the given existing table. - * The source table cannot be temporary table, Index table or an external table created using - * the datasource API. In the target table definition, the table comment is always empty but - * the column comments are identical to the ones defined in the source table. + * The source table cannot be temporary table or Index table. In the target table definition, + * the table comment is always empty but the column comments are identical to the ones defined + * in the source table. + * + * The CatalogTable attributes copied from the source table include storage(inputFormat, outputFormat, + * serde, compressed, properties), schema, provider, partitionColumnNames, bucketSpec, properties, + * unsupportedFeatures. * * The syntax of using this command in SQL is: * {{{ @@ -91,24 +95,19 @@ case class CreateTableLikeCommand( s"CREATE TABLE LIKE is not allowed when the source table is ${o.name}") } - if (DDLUtils.isDatasourceTable(sourceTableDesc) && - sourceTableDesc.tableType != CatalogTableType.MANAGED) { - throw new AnalysisException( - "CREATE TABLE LIKE is not allowed when the source table is an external table created " + - "using the datasource API") - } - // For EXTERNAL_TABLE, the table properties has a particular field. To change it // to a MANAGED_TABLE, we need to remove it; Otherwise, it will be EXTERNAL_TABLE, // even if we set the tableType to MANAGED // (metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java#L1095-L1105) // Table comment is stored as a table property. To clean it, we also should remove them. val newTableProp = - sourceTableDesc.properties.filterKeys(key => key != "EXTERNAL" && key != "comment") + sourceTableDesc.properties.filterKeys(key => + key != "EXTERNAL" && key.toLowerCase != "comment") val newSerdeProp = if (DDLUtils.isDatasourceTable(sourceTableDesc)) { val newPath = catalog.defaultTablePath(targetTable) - sourceTableDesc.storage.properties ++ Map("path" -> newPath) + sourceTableDesc.storage.properties.filterKeys(_.toLowerCase != "path") ++ + Map("path" -> newPath) } else { sourceTableDesc.storage.properties } @@ -116,9 +115,10 @@ case class CreateTableLikeCommand( sourceTableDesc.copy( identifier = targetTable, tableType = CatalogTableType.MANAGED, + owner = "", createTime = System.currentTimeMillis, - properties = newTableProp, lastAccessTime = -1, + properties = newTableProp, comment = None, viewOriginalText = None, viewText = None).withNewStorage( diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index cfc4702e5727..c1aaf79279d0 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -27,6 +27,7 @@ import org.apache.spark.sql.{AnalysisException, QueryTest, Row, SaveMode} import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, CatalogTable, CatalogTableType} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.execution.command.DDLUtils +import org.apache.spark.sql.execution.datasources.CaseInsensitiveMap import org.apache.spark.sql.hive.test.TestHiveSingleton import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SQLTestUtils @@ -647,18 +648,18 @@ class HiveDDLSuite spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd) .write.format("parquet").save(path) sql(s"CREATE TABLE $sourceTabName USING parquet OPTIONS (PATH '$path')") + sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName") // The source table should be an external data source table val sourceTable = spark.sessionState.catalog.getTableMetadata( TableIdentifier(sourceTabName, Some("default"))) + val targetTable = spark.sessionState.catalog.getTableMetadata( + TableIdentifier(targetTabName, Some("default"))) + // The table type of the source table should be an external data source table assert(DDLUtils.isDatasourceTable(sourceTable)) assert(sourceTable.tableType == CatalogTableType.EXTERNAL) - val e = intercept[AnalysisException] { - sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName") - }.getMessage - assert(e.contains("CREATE TABLE LIKE is not allowed when the source table is " + - "an external table created using the datasource API")) + checkCreateTableLike(sourceTable, targetTable) } } } @@ -740,7 +741,7 @@ class HiveDDLSuite private def getTablePath(table: CatalogTable): Option[String] = { if (DDLUtils.isDatasourceTable(table)) { - table.storage.properties.get("path") + new CaseInsensitiveMap(table.storage.properties).get("path") } else { table.storage.locationUri } From 6180e80c2bc69d23441b752f78f1ca94071dd0ba Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Tue, 9 Aug 2016 13:11:20 -0700 Subject: [PATCH 08/17] fixed the style. --- .../org/apache/spark/sql/execution/command/tables.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index 904db6a42e8a..c8032b3b30a0 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -61,9 +61,9 @@ case class CreateHiveTableAsSelectLogicalPlan( * the table comment is always empty but the column comments are identical to the ones defined * in the source table. * - * The CatalogTable attributes copied from the source table include storage(inputFormat, outputFormat, - * serde, compressed, properties), schema, provider, partitionColumnNames, bucketSpec, properties, - * unsupportedFeatures. + * The CatalogTable attributes copied from the source table include storage(inputFormat, + * outputFormat, serde, compressed, properties), schema, provider, partitionColumnNames, + * bucketSpec, properties, unsupportedFeatures. * * The syntax of using this command in SQL is: * {{{ From 111bdde80d35b206e9908f1b8f05da889ea9e2cb Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Fri, 12 Aug 2016 20:57:46 -0700 Subject: [PATCH 09/17] address comments --- .../spark/sql/execution/command/tables.scala | 43 ++++++++++--------- .../sql/hive/execution/HiveDDLSuite.scala | 23 +++++++++- 2 files changed, 44 insertions(+), 22 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index 6201aa9d1d0a..ff84a8c445dc 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -29,7 +29,7 @@ import org.apache.hadoop.fs.Path import org.apache.spark.sql.{AnalysisException, Row, SparkSession} import org.apache.spark.sql.catalyst.TableIdentifier -import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogTable, CatalogTableType} +import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, CatalogTable, CatalogTableType} import org.apache.spark.sql.catalyst.catalog.CatalogTableType._ import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} @@ -57,11 +57,10 @@ case class CreateHiveTableAsSelectLogicalPlan( /** * A command to create a MANAGED table with the same definition of the given existing table. - * The source table cannot be temporary table or Index table. In the target table definition, - * the table comment is always empty but the column comments are identical to the ones defined - * in the source table. + * The source table cannot be an Index table. In the target table definition, the table comment is + * always empty but the column comments are identical to the ones defined in the source table. * - * The CatalogTable attributes copied from the source table include storage(inputFormat, + * The CatalogTable attributes copied from the source table are storage(inputFormat, * outputFormat, serde, compressed, properties), schema, provider, partitionColumnNames, * bucketSpec, properties, unsupportedFeatures. * @@ -82,12 +81,9 @@ case class CreateTableLikeCommand( throw new AnalysisException( s"Source table in CREATE TABLE LIKE does not exist: '$sourceTable'") } - if (catalog.isTemporaryTable(sourceTable)) { - throw new AnalysisException( - s"Source table in CREATE TABLE LIKE cannot be temporary: '$sourceTable'") - } val sourceTableDesc = catalog.getTableMetadata(sourceTable) + val sourceStorageFormat = sourceTableDesc.storage sourceTableDesc.tableType match { case CatalogTableType.MANAGED | CatalogTableType.EXTERNAL | CatalogTableType.VIEW => // OK @@ -100,9 +96,7 @@ case class CreateTableLikeCommand( // even if we set the tableType to MANAGED // (metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java#L1095-L1105) // Table comment is stored as a table property. To clean it, we also should remove them. - val newTableProp = - sourceTableDesc.properties.filterKeys(key => - key != "EXTERNAL" && key.toLowerCase != "comment") + val newTableProp = sourceTableDesc.properties.filterKeys(_ != "EXTERNAL") val newSerdeProp = if (DDLUtils.isDatasourceTable(sourceTableDesc)) { val newPath = catalog.defaultTablePath(targetTable) @@ -111,19 +105,26 @@ case class CreateTableLikeCommand( } else { sourceTableDesc.storage.properties } + val newStorage = + CatalogStorageFormat( + locationUri = None, + inputFormat = sourceStorageFormat.inputFormat, + outputFormat = sourceStorageFormat.outputFormat, + serde = sourceStorageFormat.serde, + compressed = sourceStorageFormat.compressed, + properties = newSerdeProp) + val newTableDesc = - sourceTableDesc.copy( + CatalogTable( identifier = targetTable, tableType = CatalogTableType.MANAGED, - owner = "", - createTime = System.currentTimeMillis, - lastAccessTime = -1, + storage = newStorage, + schema = sourceTableDesc.schema, + provider = sourceTableDesc.provider, + partitionColumnNames = sourceTableDesc.partitionColumnNames, + bucketSpec = sourceTableDesc.bucketSpec, properties = newTableProp, - comment = None, - viewOriginalText = None, - viewText = None).withNewStorage( - locationUri = None, - serdeProperties = newSerdeProp) + unsupportedFeatures = sourceTableDesc.unsupportedFeatures) catalog.createTable(newTableDesc, ifNotExists) Seq.empty[Row] diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index 7c5ba19e7220..d59d27aa2bd6 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -20,6 +20,7 @@ package org.apache.spark.sql.hive.execution import java.io.File import org.apache.hadoop.fs.Path +import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe import org.scalatest.BeforeAndAfterEach import org.apache.spark.internal.config._ @@ -622,6 +623,26 @@ class HiveDDLSuite } } + test("CREATE TABLE LIKE a temporary table") { + val sourceTabName = "tab1" + val targetTabName = "tab2" + withTable(sourceTabName, targetTabName) { + spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd) + .createTempView(sourceTabName) + sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName") + + val sourceTable = + spark.sessionState.catalog.getTableMetadata(TableIdentifier(sourceTabName, None)) + val targetTable = + spark.sessionState.catalog.getTableMetadata(TableIdentifier(targetTabName, Some("default"))) + + assert(targetTable.storage.serde == + Option(classOf[LazySimpleSerDe].getCanonicalName)) + + checkCreateTableLike(sourceTable, targetTable) + } + } + test("CREATE TABLE LIKE a data source table") { val sourceTabName = "tab1" val targetTabName = "tab2" @@ -710,7 +731,7 @@ class HiveDDLSuite val sourceTable = catalog.getTableMetadata(TableIdentifier(sourceTabName, Some("default"))) assert(sourceTable.tableType == CatalogTableType.EXTERNAL) - assert(sourceTable.properties.get("comment") == Option("Apache Spark")) + assert(sourceTable.comment == Option("Apache Spark")) val targetTable = catalog.getTableMetadata(TableIdentifier(targetTabName, Some("default"))) checkCreateTableLike(sourceTable, targetTable) From 63c4bc598987272460dfca9d324bb26af5451280 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Mon, 22 Aug 2016 23:10:17 -0700 Subject: [PATCH 10/17] address comments. --- .../org/apache/spark/sql/execution/command/tables.scala | 8 +------- .../org/apache/spark/sql/hive/client/HiveClientImpl.scala | 4 +++- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index f6cc1f452e67..5d4f01e46fc9 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -91,12 +91,6 @@ case class CreateTableLikeCommand( s"CREATE TABLE LIKE is not allowed when the source table is ${o.name}") } - // For EXTERNAL_TABLE, the table properties has a particular field. To change it - // to a MANAGED_TABLE, we need to remove it; Otherwise, it will be EXTERNAL_TABLE, - // even if we set the tableType to MANAGED - // (metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java#L1095-L1105) - // Table comment is stored as a table property. To clean it, we also should remove them. - val newTableProp = sourceTableDesc.properties.filterKeys(_ != "EXTERNAL") val newSerdeProp = if (DDLUtils.isDatasourceTable(sourceTableDesc)) { val newPath = catalog.defaultTablePath(targetTable) @@ -123,7 +117,7 @@ case class CreateTableLikeCommand( provider = sourceTableDesc.provider, partitionColumnNames = sourceTableDesc.partitionColumnNames, bucketSpec = sourceTableDesc.bucketSpec, - properties = newTableProp, + properties = sourceTableDesc.properties, unsupportedFeatures = sourceTableDesc.unsupportedFeatures) catalog.createTable(newTableDesc, ifNotExists) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala index 9b7afd462841..2747a9a464ef 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala @@ -399,7 +399,9 @@ private[hive] class HiveClientImpl( properties = Option(h.getTTable.getSd.getSerdeInfo.getParameters) .map(_.asScala.toMap).orNull ), - properties = properties.filter(kv => kv._1 != "comment"), + // For EXTERNAL_TABLE, the table properties has a particular field "EXTERNAL". This is added + // in the function toHiveTable. + properties = properties.filter(kv => kv._1 != "comment" && kv._1 != "EXTERNAL"), comment = properties.get("comment"), viewOriginalText = Option(h.getViewOriginalText), viewText = Option(h.getViewExpandedText), From 1844c282336f2ce450268b2659d942e493c9c911 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Tue, 23 Aug 2016 00:39:18 -0700 Subject: [PATCH 11/17] address comments --- .../spark/sql/execution/command/tables.scala | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index 5d4f01e46fc9..1358cc3a355d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -57,8 +57,8 @@ case class CreateHiveTableAsSelectLogicalPlan( /** * A command to create a MANAGED table with the same definition of the given existing table. - * The source table cannot be an Index table. In the target table definition, the table comment is - * always empty but the column comments are identical to the ones defined in the source table. + * In the target table definition, the table comment is always empty but the column comments + * are identical to the ones defined in the source table. * * The CatalogTable attributes copied from the source table are storage(inputFormat, * outputFormat, serde, compressed, properties), schema, provider, partitionColumnNames, @@ -83,7 +83,6 @@ case class CreateTableLikeCommand( } val sourceTableDesc = catalog.getTableMetadata(sourceTable) - val sourceStorageFormat = sourceTableDesc.storage sourceTableDesc.tableType match { case CatalogTableType.MANAGED | CatalogTableType.EXTERNAL | CatalogTableType.VIEW => // OK @@ -99,14 +98,9 @@ case class CreateTableLikeCommand( } else { sourceTableDesc.storage.properties } - val newStorage = - CatalogStorageFormat( - locationUri = None, - inputFormat = sourceStorageFormat.inputFormat, - outputFormat = sourceStorageFormat.outputFormat, - serde = sourceStorageFormat.serde, - compressed = sourceStorageFormat.compressed, - properties = newSerdeProp) + val newStorage = sourceTableDesc.storage.copy( + locationUri = None, + properties = newSerdeProp) val newTableDesc = CatalogTable( From f81a49b970de122c1c84fffa80bc75f894bf35e1 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Mon, 29 Aug 2016 11:37:54 -0700 Subject: [PATCH 12/17] do not copy source's table properties --- .../spark/sql/execution/command/tables.scala | 4 +-- .../sql/hive/execution/HiveDDLSuite.scala | 25 ++++++++++++++++--- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index a3768ed9afca..609697f1c124 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -45,7 +45,7 @@ import org.apache.spark.util.Utils * * The CatalogTable attributes copied from the source table are storage(inputFormat, * outputFormat, serde, compressed, properties), schema, provider, partitionColumnNames, - * bucketSpec, properties, unsupportedFeatures. + * bucketSpec, unsupportedFeatures. * * The syntax of using this command in SQL is: * {{{ @@ -94,7 +94,7 @@ case class CreateTableLikeCommand( provider = sourceTableDesc.provider, partitionColumnNames = sourceTableDesc.partitionColumnNames, bucketSpec = sourceTableDesc.bucketSpec, - properties = sourceTableDesc.properties, + properties = Map.empty[String, String], unsupportedFeatures = sourceTableDesc.unsupportedFeatures) catalog.createTable(newTableDesc, ifNotExists) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index 647a710c4570..23f18af295ec 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -693,11 +693,12 @@ class HiveDDLSuite val sourceTabName = "tab1" val targetTabName = "tab2" withTable(sourceTabName, targetTabName) { - sql(s"CREATE TABLE $sourceTabName AS SELECT 1 key, 'a' value") + sql(s"CREATE TABLE $sourceTabName TBLPROPERTIES('prop1'='value1') AS SELECT 1 key, 'a'") sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName") val sourceTable = catalog.getTableMetadata(TableIdentifier(sourceTabName, Some("default"))) assert(sourceTable.tableType == CatalogTableType.MANAGED) + assert(sourceTable.properties.get("prop1").nonEmpty) val targetTable = catalog.getTableMetadata(TableIdentifier(targetTabName, Some("default"))) checkCreateTableLike(sourceTable, targetTable) @@ -780,8 +781,26 @@ class HiveDDLSuite "the view text and original text in the created table must be empty") assert(targetTable.comment.isEmpty, "the comment in the created table must be empty") - assert(targetTable.properties.get("comment").isEmpty, - "the comment in the created table must be empty") + + val metastoreGeneratedProperties = Seq( + "CreateTime", + "transient_lastDdlTime", + "grantTime", + "lastUpdateTime", + "last_modified_by", + "last_modified_time", + "Owner:", + "COLUMN_STATS_ACCURATE", + "numFiles", + "numRows", + "rawDataSize", + "totalSize", + "totalNumberFiles", + "maxFileSize", + "minFileSize" + ) + assert(targetTable.properties.filterKeys(!metastoreGeneratedProperties.contains(_)).isEmpty, + "the table properties of source tables should not be copied in the created table") if (DDLUtils.isDatasourceTable(sourceTable)) { assert(DDLUtils.isDatasourceTable(targetTable), From cf747f80d0756591e1ed801b89a6ef18257ed1df Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Mon, 29 Aug 2016 20:02:53 -0700 Subject: [PATCH 13/17] address comments. --- .../spark/sql/execution/command/tables.scala | 7 ---- .../sql/hive/execution/HiveDDLSuite.scala | 39 ++++++++++++------- 2 files changed, 24 insertions(+), 22 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index 609697f1c124..1206034a0aad 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -67,12 +67,6 @@ case class CreateTableLikeCommand( val sourceTableDesc = catalog.getTableMetadata(sourceTable) - sourceTableDesc.tableType match { - case CatalogTableType.MANAGED | CatalogTableType.EXTERNAL | CatalogTableType.VIEW => // OK - case o => throw new AnalysisException( - s"CREATE TABLE LIKE is not allowed when the source table is ${o.name}") - } - val newSerdeProp = if (DDLUtils.isDatasourceTable(sourceTableDesc)) { val newPath = catalog.defaultTablePath(targetTable) @@ -94,7 +88,6 @@ case class CreateTableLikeCommand( provider = sourceTableDesc.provider, partitionColumnNames = sourceTableDesc.partitionColumnNames, bucketSpec = sourceTableDesc.bucketSpec, - properties = Map.empty[String, String], unsupportedFeatures = sourceTableDesc.unsupportedFeatures) catalog.createTable(newTableDesc, ifNotExists) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index 23f18af295ec..01e20a528f95 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -623,23 +623,26 @@ class HiveDDLSuite } } - test("CREATE TABLE LIKE a temporary table") { - val sourceTabName = "tab1" + test("CREATE TABLE LIKE a temporary view") { + val sourceViewName = "tab1" val targetTabName = "tab2" - withTable(sourceTabName, targetTabName) { - spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd) - .createTempView(sourceTabName) - sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName") + withTempView(sourceViewName) { + withTable(targetTabName) { + spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd) + .createTempView(sourceViewName) + sql(s"CREATE TABLE $targetTabName LIKE $sourceViewName") - val sourceTable = - spark.sessionState.catalog.getTableMetadata(TableIdentifier(sourceTabName, None)) - val targetTable = - spark.sessionState.catalog.getTableMetadata(TableIdentifier(targetTabName, Some("default"))) + val sourceTable = spark.sessionState.catalog.getTableMetadata( + TableIdentifier(sourceViewName, None)) + val targetTable = spark.sessionState.catalog.getTableMetadata( + TableIdentifier(targetTabName, Some("default"))) - assert(targetTable.storage.serde == - Option(classOf[LazySimpleSerDe].getCanonicalName)) + // Source table is a temporary view, which does not have a serde + // We always pick the default serde, which is LazySimpleSerDe + assert(targetTable.storage.serde == Option(classOf[LazySimpleSerDe].getCanonicalName)) - checkCreateTableLike(sourceTable, targetTable) + checkCreateTableLike(sourceTable, targetTable) + } } } @@ -758,6 +761,9 @@ class HiveDDLSuite assert(sourceView.viewText.nonEmpty && sourceView.viewOriginalText.nonEmpty) val targetTable = spark.sessionState.catalog.getTableMetadata( TableIdentifier(targetTabName, Some("default"))) + // Source table is a view, which does not have a serde + // We always pick the default serde, which is LazySimpleSerDe + assert(targetTable.storage.serde == Option(classOf[LazySimpleSerDe].getCanonicalName)) checkCreateTableLike(sourceView, targetTable) } @@ -773,12 +779,15 @@ class HiveDDLSuite } private def checkCreateTableLike(sourceTable: CatalogTable, targetTable: CatalogTable): Unit = { - // The original source should be a MANAGED table with empty view text and original text - // The location of table should not be empty. + // The created table should be a MANAGED table with empty view text and original text. assert(targetTable.tableType == CatalogTableType.MANAGED, "the created table must be a Hive managed table") assert(targetTable.viewText.isEmpty && targetTable.viewOriginalText.isEmpty, "the view text and original text in the created table must be empty") + // The location of created table should not be empty. Although Spark SQL does not set it, + // when creating it, Hive populates it. + assert(targetTable.storage.locationUri.nonEmpty, + "the location of created table should not be empty") assert(targetTable.comment.isEmpty, "the comment in the created table must be empty") From 1f70f9ae2fdf2651889c574c0d29a99564685443 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Mon, 29 Aug 2016 20:13:41 -0700 Subject: [PATCH 14/17] do not copy unsupportedFeatures --- .../org/apache/spark/sql/execution/command/tables.scala | 6 ++---- .../org/apache/spark/sql/hive/execution/HiveDDLSuite.scala | 2 ++ 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index 1206034a0aad..cab373e6f62b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -44,8 +44,7 @@ import org.apache.spark.util.Utils * are identical to the ones defined in the source table. * * The CatalogTable attributes copied from the source table are storage(inputFormat, - * outputFormat, serde, compressed, properties), schema, provider, partitionColumnNames, - * bucketSpec, unsupportedFeatures. + * outputFormat, serde, compressed, properties), schema, provider, partitionColumnNames, bucketSpec. * * The syntax of using this command in SQL is: * {{{ @@ -87,8 +86,7 @@ case class CreateTableLikeCommand( schema = sourceTableDesc.schema, provider = sourceTableDesc.provider, partitionColumnNames = sourceTableDesc.partitionColumnNames, - bucketSpec = sourceTableDesc.bucketSpec, - unsupportedFeatures = sourceTableDesc.unsupportedFeatures) + bucketSpec = sourceTableDesc.bucketSpec) catalog.createTable(newTableDesc, ifNotExists) Seq.empty[Row] diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index 01e20a528f95..03cde90ebca8 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -790,6 +790,8 @@ class HiveDDLSuite "the location of created table should not be empty") assert(targetTable.comment.isEmpty, "the comment in the created table must be empty") + assert(targetTable.unsupportedFeatures.isEmpty, + "the unsupportedFeatures in the create table must be empty") val metastoreGeneratedProperties = Seq( "CreateTime", From ba1b69d886a8e3e4c206a92fdbcf603c02478f58 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Wed, 31 Aug 2016 09:30:04 -0700 Subject: [PATCH 15/17] fix --- .../spark/sql/execution/command/tables.scala | 34 +++++++++++++++---- .../sql/hive/execution/HiveDDLSuite.scala | 19 +++++++---- 2 files changed, 40 insertions(+), 13 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index cab373e6f62b..10aa661b0fd7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -35,6 +35,7 @@ import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} import org.apache.spark.sql.catalyst.util.quoteIdentifier import org.apache.spark.sql.execution.datasources.PartitioningUtils +import org.apache.spark.sql.internal.HiveSerDe import org.apache.spark.sql.types._ import org.apache.spark.util.Utils @@ -43,8 +44,8 @@ import org.apache.spark.util.Utils * In the target table definition, the table comment is always empty but the column comments * are identical to the ones defined in the source table. * - * The CatalogTable attributes copied from the source table are storage(inputFormat, - * outputFormat, serde, compressed, properties), schema, provider, partitionColumnNames, bucketSpec. + * The CatalogTable attributes copied from the source table are storage(inputFormat, outputFormat, + * serde, compressed, properties), schema, provider, partitionColumnNames, bucketSpec. * * The syntax of using this command in SQL is: * {{{ @@ -65,6 +66,7 @@ case class CreateTableLikeCommand( } val sourceTableDesc = catalog.getTableMetadata(sourceTable) + val sourceTableType = sourceTableDesc.tableType val newSerdeProp = if (DDLUtils.isDatasourceTable(sourceTableDesc)) { @@ -74,9 +76,29 @@ case class CreateTableLikeCommand( } else { sourceTableDesc.storage.properties } - val newStorage = sourceTableDesc.storage.copy( - locationUri = None, - properties = newSerdeProp) + + // Storage format + val newStorage = + if (sourceTableType == CatalogTableType.VIEW) { + val defaultStorageType = sparkSession.conf.get("hive.default.fileformat", "textfile") + val defaultHiveSerde = HiveSerDe.sourceToSerDe(defaultStorageType) + CatalogStorageFormat( + locationUri = None, + inputFormat = defaultHiveSerde.flatMap(_.inputFormat) + .orElse(Some("org.apache.hadoop.mapred.TextInputFormat")), + outputFormat = defaultHiveSerde.flatMap(_.outputFormat) + .orElse(Some("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat")), + serde = None, + compressed = false, + properties = Map()) + } else { + sourceTableDesc.storage.copy( + locationUri = None, + properties = newSerdeProp) + } + + val newProvider = + if (sourceTableType == CatalogTableType.VIEW) Some("hive") else sourceTableDesc.provider val newTableDesc = CatalogTable( @@ -84,7 +106,7 @@ case class CreateTableLikeCommand( tableType = CatalogTableType.MANAGED, storage = newStorage, schema = sourceTableDesc.schema, - provider = sourceTableDesc.provider, + provider = newProvider, partitionColumnNames = sourceTableDesc.partitionColumnNames, bucketSpec = sourceTableDesc.bucketSpec) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index b018042d6ba4..eadfb7b0578e 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -677,10 +677,6 @@ class HiveDDLSuite val targetTable = spark.sessionState.catalog.getTableMetadata( TableIdentifier(targetTabName, Some("default"))) - // Source table is a temporary view, which does not have a serde - // We always pick the default serde, which is LazySimpleSerDe - assert(targetTable.storage.serde == Option(classOf[LazySimpleSerDe].getCanonicalName)) - checkCreateTableLike(sourceTable, targetTable) } } @@ -801,9 +797,6 @@ class HiveDDLSuite assert(sourceView.viewText.nonEmpty && sourceView.viewOriginalText.nonEmpty) val targetTable = spark.sessionState.catalog.getTableMetadata( TableIdentifier(targetTabName, Some("default"))) - // Source table is a view, which does not have a serde - // We always pick the default serde, which is LazySimpleSerDe - assert(targetTable.storage.serde == Option(classOf[LazySimpleSerDe].getCanonicalName)) checkCreateTableLike(sourceView, targetTable) } @@ -861,6 +854,18 @@ class HiveDDLSuite "the target table should be a Hive serde table") } + if (sourceTable.tableType == CatalogTableType.VIEW) { + // Source table is a temporary/permanent view, which does not have a serde, inputFormat and + // outputFormat. The created target table uses the default storage formats and serde. + assert(targetTable.storage.serde == Option(classOf[LazySimpleSerDe].getCanonicalName)) + assert(targetTable.storage.inputFormat == Option("org.apache.hadoop.mapred.TextInputFormat")) + assert(targetTable.storage.outputFormat == + Option("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat")) + // The source temporary/permanent view does not have provider values. `hive` is used in the + // created table. + assert(targetTable.provider == Option("hive")) + } + val sourceTablePath = getTablePath(sourceTable) val targetTablePath = getTablePath(targetTable) assert(targetTablePath.nonEmpty, "target table path should not be empty") From 4ce96e62adaa28965fb7c85e246ce2e1c86eba60 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Wed, 31 Aug 2016 20:22:10 -0700 Subject: [PATCH 16/17] address comments --- .../spark/sql/execution/command/tables.scala | 40 ++++++++----------- .../sql/hive/execution/HiveDDLSuite.scala | 20 +++++----- 2 files changed, 26 insertions(+), 34 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index 10aa661b0fd7..67b2329effb7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -66,39 +66,31 @@ case class CreateTableLikeCommand( } val sourceTableDesc = catalog.getTableMetadata(sourceTable) - val sourceTableType = sourceTableDesc.tableType - - val newSerdeProp = - if (DDLUtils.isDatasourceTable(sourceTableDesc)) { - val newPath = catalog.defaultTablePath(targetTable) - sourceTableDesc.storage.properties.filterKeys(_.toLowerCase != "path") ++ - Map("path" -> newPath) - } else { - sourceTableDesc.storage.properties - } // Storage format val newStorage = - if (sourceTableType == CatalogTableType.VIEW) { - val defaultStorageType = sparkSession.conf.get("hive.default.fileformat", "textfile") - val defaultHiveSerde = HiveSerDe.sourceToSerDe(defaultStorageType) - CatalogStorageFormat( + if (sourceTableDesc.tableType == CatalogTableType.VIEW) { + val newPath = catalog.defaultTablePath(targetTable) + CatalogStorageFormat.empty.copy(properties = Map("path" -> newPath)) + } else if (DDLUtils.isDatasourceTable(sourceTableDesc)) { + val newPath = catalog.defaultTablePath(targetTable) + val newSerdeProp = + sourceTableDesc.storage.properties.filterKeys(_.toLowerCase != "path") ++ + Map("path" -> newPath) + sourceTableDesc.storage.copy( locationUri = None, - inputFormat = defaultHiveSerde.flatMap(_.inputFormat) - .orElse(Some("org.apache.hadoop.mapred.TextInputFormat")), - outputFormat = defaultHiveSerde.flatMap(_.outputFormat) - .orElse(Some("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat")), - serde = None, - compressed = false, - properties = Map()) + properties = newSerdeProp) } else { sourceTableDesc.storage.copy( locationUri = None, - properties = newSerdeProp) + properties = sourceTableDesc.storage.properties) } - val newProvider = - if (sourceTableType == CatalogTableType.VIEW) Some("hive") else sourceTableDesc.provider + val newProvider = if (sourceTableDesc.tableType == CatalogTableType.VIEW) { + Some(sparkSession.sessionState.conf.defaultDataSourceName) + } else { + sourceTableDesc.provider + } val newTableDesc = CatalogTable( diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index eadfb7b0578e..1a2bcc5540ce 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -20,7 +20,6 @@ package org.apache.spark.sql.hive.execution import java.io.File import org.apache.hadoop.fs.Path -import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe import org.scalatest.BeforeAndAfterEach import org.apache.spark.internal.config._ @@ -846,7 +845,8 @@ class HiveDDLSuite assert(targetTable.properties.filterKeys(!metastoreGeneratedProperties.contains(_)).isEmpty, "the table properties of source tables should not be copied in the created table") - if (DDLUtils.isDatasourceTable(sourceTable)) { + if (DDLUtils.isDatasourceTable(sourceTable) || + sourceTable.tableType == CatalogTableType.VIEW) { assert(DDLUtils.isDatasourceTable(targetTable), "the target table should be a data source table") } else { @@ -855,15 +855,15 @@ class HiveDDLSuite } if (sourceTable.tableType == CatalogTableType.VIEW) { - // Source table is a temporary/permanent view, which does not have a serde, inputFormat and - // outputFormat. The created target table uses the default storage formats and serde. - assert(targetTable.storage.serde == Option(classOf[LazySimpleSerDe].getCanonicalName)) - assert(targetTable.storage.inputFormat == Option("org.apache.hadoop.mapred.TextInputFormat")) + // Source table is a temporary/permanent view, which does not have a provider, serde, + // inputFormat, and outputFormat. The created target table uses the default data source format + assert(targetTable.storage.serde == + Option("org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe")) + assert(targetTable.storage.inputFormat == + Option("org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat")) assert(targetTable.storage.outputFormat == - Option("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat")) - // The source temporary/permanent view does not have provider values. `hive` is used in the - // created table. - assert(targetTable.provider == Option("hive")) + Option("org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat")) + assert(targetTable.provider == Option(spark.sessionState.conf.defaultDataSourceName)) } val sourceTablePath = getTablePath(sourceTable) From 4bcb306b3a3801e1bc76be14487e097c0c517b8f Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Wed, 31 Aug 2016 22:59:59 -0700 Subject: [PATCH 17/17] address comments. --- .../spark/sql/hive/execution/HiveDDLSuite.scala | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index 1a2bcc5540ce..7f3d96de85ae 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -816,10 +816,6 @@ class HiveDDLSuite "the created table must be a Hive managed table") assert(targetTable.viewText.isEmpty && targetTable.viewOriginalText.isEmpty, "the view text and original text in the created table must be empty") - // The location of created table should not be empty. Although Spark SQL does not set it, - // when creating it, Hive populates it. - assert(targetTable.storage.locationUri.nonEmpty, - "the location of created table should not be empty") assert(targetTable.comment.isEmpty, "the comment in the created table must be empty") assert(targetTable.unsupportedFeatures.isEmpty, @@ -855,15 +851,11 @@ class HiveDDLSuite } if (sourceTable.tableType == CatalogTableType.VIEW) { - // Source table is a temporary/permanent view, which does not have a provider, serde, - // inputFormat, and outputFormat. The created target table uses the default data source format - assert(targetTable.storage.serde == - Option("org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe")) - assert(targetTable.storage.inputFormat == - Option("org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat")) - assert(targetTable.storage.outputFormat == - Option("org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat")) + // Source table is a temporary/permanent view, which does not have a provider. The created + // target table uses the default data source format assert(targetTable.provider == Option(spark.sessionState.conf.defaultDataSourceName)) + } else { + assert(targetTable.provider == sourceTable.provider) } val sourceTablePath = getTablePath(sourceTable)