[SPARK-17167][2.0][SQL] Issue Exceptions when Analyze Table on In-Memory Cataloged Tables

gatorsmile · hvanhovell · commit 2b32a442dfbc · 2016-08-25T14:38:41.000+02:00
### What changes were proposed in this pull request? Currently, `Analyze Table` is only used for Hive-serde tables. We should issue exceptions in all the other cases. When the tables are data source tables, we issued an exception. However, when tables are In-Memory Cataloged tables, we do not issue any exception. This PR is to issue an exception when the tables are in-memory cataloged. For example, ```SQL CREATE TABLE tbl(a INT, b INT) USING parquet ``` `tbl` is a `SimpleCatalogRelation` when the hive support is not enabled. ### How was this patch tested? Added two test cases. One of them is just to improve the test coverage when the analyzed table is data source tables. Author: gatorsmile <gatorsmile@gmail.com> Closes #14781 from gatorsmile/analyzeInMemoryTable2.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeTableCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeTableCommand.scala
@@ -23,7 +23,7 @@ import org.apache.hadoop.fs.{FileSystem, Path}
 
 import org.apache.spark.sql.{AnalysisException, Row, SparkSession}
 import org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases
-import org.apache.spark.sql.catalyst.catalog.{CatalogRelation, CatalogTable}
+import org.apache.spark.sql.catalyst.catalog.{CatalogRelation, CatalogTable, SimpleCatalogRelation}
 
 
 /**
@@ -41,7 +41,7 @@ case class AnalyzeTableCommand(tableName: String) extends RunnableCommand {
     val relation = EliminateSubqueryAliases(sessionState.catalog.lookupRelation(tableIdent))
 
     relation match {
-      case relation: CatalogRelation =>
+      case relation: CatalogRelation if !relation.isInstanceOf[SimpleCatalogRelation] =>
         val catalogTable: CatalogTable = relation.catalogTable
         // This method is mainly based on
         // org.apache.hadoop.hive.ql.stats.StatsUtils.getFileSizeForTable(HiveConf, Table)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -395,6 +395,17 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach {
     assert(catalog.getTableMetadata(tableIdent1) === expectedTable)
   }
 
+  test("Analyze in-memory cataloged tables(SimpleCatalogRelation)") {
+    withTable("tbl") {
+      sql("CREATE TABLE tbl(a INT, b INT) USING parquet")
+      val e = intercept[AnalysisException] {
+        sql("ANALYZE TABLE tbl COMPUTE STATISTICS")
+      }.getMessage
+      assert(e.contains("ANALYZE TABLE is only supported for Hive tables, " +
+        "but 'tbl' is a SimpleCatalogRelation"))
+    }
+  }
+
   test("create table using") {
     val catalog = spark.sessionState.catalog
     withTable("tbl") {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -594,6 +594,21 @@ class HiveDDLSuite
     }
   }
 
+  test("Analyze data source tables(LogicalRelation)") {
+    withTable("t1") {
+      withTempPath { dir =>
+        val path = dir.getCanonicalPath
+        spark.range(1).write.format("parquet").save(path)
+        sql(s"CREATE TABLE t1 USING parquet OPTIONS (PATH '$path')")
+        val e = intercept[AnalysisException] {
+          sql("ANALYZE TABLE t1 COMPUTE STATISTICS")
+        }.getMessage
+        assert(e.contains("ANALYZE TABLE is only supported for Hive tables, " +
+          "but 't1' is a LogicalRelation"))
+      }
+    }
+  }
+
   test("desc table for data source table") {
     withTable("tab1") {
       val tabName = "tab1"