From cfbfefc07364506fbafea0d853786e81c93cdebd Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <simonh@tw.ibm.com>
Date: Mon, 22 Aug 2016 17:19:14 +0800
Subject: [PATCH 1/3] Support ANALYZE TABLE on analyzable temporary table.

---
 .../sql/catalyst/catalog/SessionCatalog.scala |  83 ++++++++---
 .../plans/logical/StatisticsSetter.scala      |  25 ++++
 .../command/AnalyzeTableCommand.scala         | 132 ++++++++++--------
 .../datasources/LogicalRelation.scala         |  13 +-
 .../sql/execution/command/DDLSuite.scala      |  38 +++++
 .../spark/sql/hive/HiveSessionCatalog.scala   |   3 +-
 .../spark/sql/hive/StatisticsSuite.scala      |  16 ++-
 7 files changed, 226 insertions(+), 84 deletions(-)
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/StatisticsSetter.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 62d0da076b5a..223ca6d6a5d4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -31,7 +31,7 @@ import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder
 import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionInfo}
-import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias}
+import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Statistics, StatisticsSetter, SubqueryAlias}
 import org.apache.spark.sql.catalyst.util.StringUtils
 
 object SessionCatalog {
@@ -74,7 +74,7 @@ class SessionCatalog(
 
   /** List of temporary tables, mapping from table name to their logical plan. */
   @GuardedBy("this")
-  protected val tempTables = new mutable.HashMap[String, LogicalPlan]
+  protected val tempTables = new mutable.HashMap[String, TempTable]
 
   // Note: we track current database here because certain operations do not explicitly
   // specify the database (e.g. DROP TABLE my_table). In these cases we must first
@@ -238,11 +238,18 @@ class SessionCatalog(
   def alterTable(tableDefinition: CatalogTable): Unit = {
     val db = formatDatabaseName(tableDefinition.identifier.database.getOrElse(getCurrentDatabase))
     val table = formatTableName(tableDefinition.identifier.table)
-    val tableIdentifier = TableIdentifier(table, Some(db))
-    val newTableDefinition = tableDefinition.copy(identifier = tableIdentifier)
-    requireDbExists(db)
-    requireTableExists(tableIdentifier)
-    externalCatalog.alterTable(newTableDefinition)
+    val tableIdent = TableIdentifier(table)
+    if (isTemporaryTable(tableIdent)) {
+      val plan = tempTables(table).logicalPlan
+      tempTables.remove(table)
+      tempTables.put(table, TempTable(plan, tableDefinition))
+    } else {
+      val tableIdentifier = TableIdentifier(table, Some(db))
+      val newTableDefinition = tableDefinition.copy(identifier = tableIdentifier)
+      requireDbExists(db)
+      requireTableExists(tableIdentifier)
+      externalCatalog.alterTable(newTableDefinition)
+    }
   }
 
   /**
@@ -255,13 +262,7 @@ class SessionCatalog(
     val table = formatTableName(name.table)
     val tid = TableIdentifier(table)
     if (isTemporaryTable(name)) {
-      CatalogTable(
-        identifier = tid,
-        tableType = CatalogTableType.VIEW,
-        storage = CatalogStorageFormat.empty,
-        schema = tempTables(table).output.toStructType,
-        properties = Map(),
-        viewText = None)
+      tempTables(table).metadata
     } else {
       requireDbExists(db)
       requireTableExists(TableIdentifier(table, Some(db)))
@@ -341,7 +342,15 @@ class SessionCatalog(
     if (tempTables.contains(table) && !overrideIfExists) {
       throw new TempTableAlreadyExistsException(name)
     }
-    tempTables.put(table, tableDefinition)
+    val tid = TableIdentifier(table)
+    val metadata = CatalogTable(
+      identifier = tid,
+      tableType = CatalogTableType.VIEW,
+      storage = CatalogStorageFormat.empty,
+      schema = tableDefinition.output.toStructType,
+      properties = Map(),
+      viewText = None)
+    tempTables.put(table, TempTable(tableDefinition, metadata))
   }
 
   /**
@@ -432,11 +441,28 @@ class SessionCatalog(
         }
         SubqueryAlias(relationAlias, SimpleCatalogRelation(db, metadata), view)
       } else {
-        SubqueryAlias(relationAlias, tempTables(table), Option(name))
+        SubqueryAlias(relationAlias, lookupTempTable(table), Option(name))
       }
     }
   }
 
+  /**
+   * Return a [[LogicalPlan]] that represents the given temporary table.
+   */
+  protected def lookupTempTable(table: String): LogicalPlan = {
+    val tempTable = tempTables(table)
+    tempTable.logicalPlan match {
+      case s: StatisticsSetter =>
+        val tableParameters = tempTable.metadata.properties
+        tableParameters.get("totalSize").foreach { size =>
+          val stat = Statistics(sizeInBytes = BigInt(size))
+          s.setStatistics(stat)
+        }
+      case o => o
+    }
+    tempTable.logicalPlan
+  }
+
   /**
    * Return whether a table/view with the specified name exists.
    *
@@ -465,6 +491,22 @@ class SessionCatalog(
     name.database.isEmpty && tempTables.contains(formatTableName(name.table))
   }
 
+  /**
+   * Return whether a temporary table can be analyzed.
+   */
+  def isAnalyzableTemporaryTable(name: TableIdentifier): Boolean = synchronized {
+    if (isTemporaryTable(name)) {
+      val table = formatTableName(name.table)
+      val tempTable = tempTables(table)
+      tempTable.logicalPlan match {
+        case s: StatisticsSetter => true
+        case o => false
+      }
+    } else {
+      false
+    }
+  }
+
   /**
    * List all tables in the specified database, including temporary tables.
    */
@@ -493,7 +535,7 @@ class SessionCatalog(
     // If the database is defined, this is definitely not a temp table.
     // If the database is not defined, there is a good chance this is a temp table.
     if (name.database.isEmpty) {
-      tempTables.get(formatTableName(name.table)).foreach(_.refresh())
+      tempTables.get(formatTableName(name.table)).foreach(_.logicalPlan.refresh())
     }
   }
 
@@ -510,7 +552,7 @@ class SessionCatalog(
    * For testing only.
    */
   private[catalog] def getTempTable(name: String): Option[LogicalPlan] = synchronized {
-    tempTables.get(formatTableName(name))
+    tempTables.get(formatTableName(name)).map(_.logicalPlan)
   }
 
   // ----------------------------------------------------------------------------
@@ -929,3 +971,8 @@ class SessionCatalog(
   }
 
 }
+
+/**
+ * The case class represents a temporary table
+ */
+private[sql] case class TempTable(logicalPlan: LogicalPlan, metadata: CatalogTable)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/StatisticsSetter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/StatisticsSetter.scala
new file mode 100644
index 000000000000..a0f5518d19af
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/StatisticsSetter.scala
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.plans.logical
+
+/**
+ * A trait that is used to set statistics to [[LogicalPlan]].
+ */
+private[sql] trait StatisticsSetter {
+  protected var _statistics: Option[Statistics] = None
+  private[sql] def setStatistics(stat: Statistics): Unit = _statistics = Option(stat)
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeTableCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeTableCommand.scala
index a469d4da8613..4e5dc832aac5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeTableCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeTableCommand.scala
@@ -22,8 +22,9 @@ import scala.util.control.NonFatal
 import org.apache.hadoop.fs.{FileSystem, Path}
 
 import org.apache.spark.sql.{AnalysisException, Row, SparkSession}
-import org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases
-import org.apache.spark.sql.catalyst.catalog.{CatalogRelation, CatalogTable}
+import org.apache.spark.sql.catalyst.analysis.{EliminateSubqueryAliases, NoSuchTableException}
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.catalog.{CatalogRelation, CatalogTable, SessionCatalog}
 
 
 /**
@@ -34,73 +35,88 @@ import org.apache.spark.sql.catalyst.catalog.{CatalogRelation, CatalogTable}
  * in the Hive metastore.
  */
 case class AnalyzeTableCommand(tableName: String) extends RunnableCommand {
+  private def getTable(catalog: SessionCatalog, name: TableIdentifier): CatalogTable = {
+    try {
+      catalog.getTableMetadata(name)
+    } catch {
+      case _: NoSuchTableException =>
+        throw new AnalysisException(s"Target table in ANALYZE TABLE does not exist: $name")
+    }
+  }
 
   override def run(sparkSession: SparkSession): Seq[Row] = {
     val sessionState = sparkSession.sessionState
+    val catalog = sessionState.catalog
     val tableIdent = sessionState.sqlParser.parseTableIdentifier(tableName)
+    val catalogTable: CatalogTable = getTable(catalog, tableIdent)
     val relation = EliminateSubqueryAliases(sessionState.catalog.lookupRelation(tableIdent))
+    val tableParameters = catalogTable.properties
+    val oldTotalSize = tableParameters.get("totalSize").map(_.toLong).getOrElse(0L)
 
-    relation match {
-      case relation: CatalogRelation =>
-        val catalogTable: CatalogTable = relation.catalogTable
-        // This method is mainly based on
-        // org.apache.hadoop.hive.ql.stats.StatsUtils.getFileSizeForTable(HiveConf, Table)
-        // in Hive 0.13 (except that we do not use fs.getContentSummary).
-        // TODO: Generalize statistics collection.
-        // TODO: Why fs.getContentSummary returns wrong size on Jenkins?
-        // Can we use fs.getContentSummary in future?
-        // Seems fs.getContentSummary returns wrong table size on Jenkins. So we use
-        // countFileSize to count the table size.
-        val stagingDir = sessionState.conf.getConfString("hive.exec.stagingdir", ".hive-staging")
-
-        def calculateTableSize(fs: FileSystem, path: Path): Long = {
-          val fileStatus = fs.getFileStatus(path)
-          val size = if (fileStatus.isDirectory) {
-            fs.listStatus(path)
-              .map { status =>
-                if (!status.getPath.getName.startsWith(stagingDir)) {
-                  calculateTableSize(fs, status.getPath)
-                } else {
-                  0L
-                }
-              }.sum
-          } else {
-            fileStatus.getLen
-          }
+    val newTotalSize =
+      if (catalog.isAnalyzableTemporaryTable(tableIdent)) {
+        val rowCount = sparkSession.table(tableName).count()
+        val outputRowSize = relation.output.map(_.dataType.defaultSize).sum
+        rowCount * outputRowSize
+      } else {
+        relation match {
+          case relation: CatalogRelation =>
+            // This method is mainly based on
+            // org.apache.hadoop.hive.ql.stats.StatsUtils.getFileSizeForTable(HiveConf, Table)
+            // in Hive 0.13 (except that we do not use fs.getContentSummary).
+            // TODO: Generalize statistics collection.
+            // TODO: Why fs.getContentSummary returns wrong size on Jenkins?
+            // Can we use fs.getContentSummary in future?
+            // Seems fs.getContentSummary returns wrong table size on Jenkins. So we use
+            // countFileSize to count the table size.
+            val stagingDir =
+              sessionState.conf.getConfString("hive.exec.stagingdir", ".hive-staging")
 
-          size
-        }
+            def calculateTableSize(fs: FileSystem, path: Path): Long = {
+              val fileStatus = fs.getFileStatus(path)
+              val size = if (fileStatus.isDirectory) {
+                fs.listStatus(path)
+                  .map { status =>
+                    if (!status.getPath.getName.startsWith(stagingDir)) {
+                      calculateTableSize(fs, status.getPath)
+                    } else {
+                      0L
+                    }
+                  }.sum
+              } else {
+                fileStatus.getLen
+              }
 
-        val tableParameters = catalogTable.properties
-        val oldTotalSize = tableParameters.get("totalSize").map(_.toLong).getOrElse(0L)
-        val newTotalSize =
-          catalogTable.storage.locationUri.map { p =>
-            val path = new Path(p)
-            try {
-              val fs = path.getFileSystem(sparkSession.sessionState.newHadoopConf())
-              calculateTableSize(fs, path)
-            } catch {
-              case NonFatal(e) =>
-                logWarning(
-                  s"Failed to get the size of table ${catalogTable.identifier.table} in the " +
-                    s"database ${catalogTable.identifier.database} because of ${e.toString}", e)
-                0L
+              size
             }
-          }.getOrElse(0L)
 
-        // Update the Hive metastore if the total size of the table is different than the size
-        // recorded in the Hive metastore.
-        // This logic is based on org.apache.hadoop.hive.ql.exec.StatsTask.aggregateStats().
-        if (newTotalSize > 0 && newTotalSize != oldTotalSize) {
-          sessionState.catalog.alterTable(
-            catalogTable.copy(
-              properties = relation.catalogTable.properties +
-                (AnalyzeTableCommand.TOTAL_SIZE_FIELD -> newTotalSize.toString)))
-        }
+            catalogTable.storage.locationUri.map { p =>
+              val path = new Path(p)
+              try {
+                val fs = path.getFileSystem(sparkSession.sessionState.newHadoopConf())
+                calculateTableSize(fs, path)
+              } catch {
+                case NonFatal(e) =>
+                  logWarning(
+                    s"Failed to get the size of table ${catalogTable.identifier.table} in the " +
+                      s"database ${catalogTable.identifier.database} because of ${e.toString}", e)
+                  0L
+              }
+            }.getOrElse(0L)
 
-      case otherRelation =>
-        throw new AnalysisException(s"ANALYZE TABLE is only supported for Hive tables, " +
-          s"but '${tableIdent.unquotedString}' is a ${otherRelation.nodeName}.")
+          case otherRelation =>
+            throw new AnalysisException(s"ANALYZE TABLE is only supported for Hive tables, " +
+              s"but '${tableIdent.unquotedString}' is a ${otherRelation.nodeName}.")
+        }
+      }
+    // Update the table properties if the total size of the table is different than the size
+    // recorded in the catalog.
+    // This logic is based on org.apache.hadoop.hive.ql.exec.StatsTask.aggregateStats().
+    if (newTotalSize > 0 && newTotalSize != oldTotalSize) {
+      sessionState.catalog.alterTable(
+        catalogTable.copy(
+          properties = catalogTable.properties +
+            (AnalyzeTableCommand.TOTAL_SIZE_FIELD -> newTotalSize.toString)))
     }
     Seq.empty[Row]
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala
index 90711f2b1dde..6a1f13fc432e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.execution.datasources
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, AttributeReference}
-import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Statistics}
+import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Statistics, StatisticsSetter}
 import org.apache.spark.sql.sources.BaseRelation
 import org.apache.spark.util.Utils
 
@@ -34,7 +34,7 @@ case class LogicalRelation(
     relation: BaseRelation,
     expectedOutputAttributes: Option[Seq[Attribute]] = None,
     metastoreTableIdentifier: Option[TableIdentifier] = None)
-  extends LeafNode with MultiInstanceRelation {
+  extends LeafNode with MultiInstanceRelation with StatisticsSetter {
 
   override val output: Seq[AttributeReference] = {
     val attrs = relation.schema.toAttributes
@@ -72,9 +72,12 @@ case class LogicalRelation(
   // expId can be different but the relation is still the same.
   override lazy val cleanArgs: Seq[Any] = Seq(relation)
 
-  @transient override lazy val statistics: Statistics = Statistics(
-    sizeInBytes = BigInt(relation.sizeInBytes)
-  )
+  @transient override def statistics: Statistics =
+    _statistics.getOrElse {
+      Statistics(
+        sizeInBytes = BigInt(relation.sizeInBytes)
+      )
+    }
 
   /** Used to lookup original attribute capitalization */
   val attributeMap: AttributeMap[AttributeReference] = AttributeMap(output.map(o => (o, o)))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index 0f7fda7666a3..f72feec8bcce 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -1658,6 +1658,44 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach {
     }
   }
 
+  test("Run analyze table command on temporary view which is analyzable") {
+    withTempPath { path =>
+      val dataPath = path.getCanonicalPath
+      spark.range(10).write.format("parquet").save(dataPath)
+      withView("view1") {
+        // Create a temporary view which is a [[LogicalRelation]] wrapping a datasource relation.
+        sql(s"CREATE TEMPORARY VIEW view1 USING parquet OPTIONS(path '$dataPath')")
+
+        val df = spark.sessionState.catalog.lookupRelation(TableIdentifier("view1"))
+
+        // Before ANALYZE TABLE, HadoopFsRelation calculates statistics from file sizes.
+        val sizeBefore =
+          spark.sessionState.catalog.lookupRelation(TableIdentifier("view1")).statistics.sizeInBytes
+        assert(sizeBefore > 80)
+
+        sql("ANALYZE TABLE view1 COMPUTE STATISTICS")
+
+        val sizeAfter =
+          spark.sessionState.catalog.lookupRelation(TableIdentifier("view1")).statistics.sizeInBytes
+        assert(sizeAfter == 80)
+      }
+    }
+  }
+
+  test("Run analyze table command on temporary view which is not analyzable") {
+    withTable("tab1") {
+      spark.range(10).write.saveAsTable("tab1")
+      withView("view1") {
+        // Create a temporary view which is represented as a query plan.
+        sql("CREATE TEMPORARY VIEW view1 (col1) AS SELECT * FROM tab1")
+        val df = spark.sessionState.catalog.lookupRelation(TableIdentifier("view1"))
+        intercept[AnalysisException] {
+          sql("ANALYZE TABLE view1 COMPUTE STATISTICS")
+        }
+      }
+    }
+  }
+
   test("create temporary view with mismatched schema") {
     withTable("tab1") {
       spark.range(10).write.saveAsTable("tab1")
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
index ebed9eb6e7dc..762ec7d77e3e 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
@@ -67,8 +67,7 @@ private[sql] class HiveSessionCatalog(
       val newName = name.copy(database = database, table = table)
       metastoreCatalog.lookupRelation(newName, alias)
     } else {
-      val relation = tempTables(table)
-      val tableWithQualifiers = SubqueryAlias(table, relation, None)
+      val tableWithQualifiers = SubqueryAlias(table, lookupTempTable(table), None)
       // If an alias was specified by the lookup, wrap the plan in a subquery so that
       // attributes are properly qualified with this alias.
       alias.map(a => SubqueryAlias(a, tableWithQualifiers, None)).getOrElse(tableWithQualifiers)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
index b275ab17a93c..ed8e27eb7ac4 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
@@ -159,13 +159,27 @@ class StatisticsSuite extends QueryTest with TestHiveSingleton with SQLTestUtils
 
     sql("DROP TABLE analyzeTable_part").collect()
 
-    // Try to analyze a temp table
+    // Try to analyze a temp table which is not analyzable.
     sql("""SELECT * FROM src""").createOrReplaceTempView("tempTable")
     intercept[AnalysisException] {
       sql("ANALYZE TABLE tempTable COMPUTE STATISTICS")
     }
     spark.sessionState.catalog.dropTable(
       TableIdentifier("tempTable"), ignoreIfNotExists = true, purge = false)
+
+    withTempPath { path =>
+      val dataPath = path.getCanonicalPath
+      spark.range(10).write.format("parquet").save(dataPath)
+      withView("view1") {
+        // Create a temporary view which is a [[LogicalRelation]] wrapping a datasource relation.
+        sql(s"CREATE TEMPORARY VIEW view1 USING parquet OPTIONS(path '$dataPath')")
+        val df = spark.sessionState.catalog.lookupRelation(TableIdentifier("view1"))
+        // Before ANALYZE TABLE, HadoopFsRelation calculates statistics from file sizes.
+        assert(queryTotalSize("view1") > 80)
+        sql("ANALYZE TABLE view1 COMPUTE STATISTICS")
+        assert(queryTotalSize("view1") == 80)
+      }
+    }
   }
 
   test("estimates the size of a test MetastoreRelation") {

From 29e22fe0a50163b50f75f5d21ccbf52a28777cd3 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <simonh@tw.ibm.com>
Date: Tue, 6 Sep 2016 16:51:19 +0800
Subject: [PATCH 2/3] Remove unnecessary change.

---
 .../spark/sql/execution/command/AnalyzeTableCommand.scala      | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeTableCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeTableCommand.scala
index 4c6a53d2ac87..b2146cff479b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeTableCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeTableCommand.scala
@@ -56,8 +56,7 @@ case class AnalyzeTableCommand(tableName: String, noscan: Boolean = true) extend
         // Can we use fs.getContentSummary in future?
         // Seems fs.getContentSummary returns wrong table size on Jenkins. So we use
         // countFileSize to count the table size.
-        val stagingDir =
-          sessionState.conf.getConfString("hive.exec.stagingdir", ".hive-staging")
+        val stagingDir = sessionState.conf.getConfString("hive.exec.stagingdir", ".hive-staging")
 
         def calculateTableSize(fs: FileSystem, path: Path): Long = {
           val fileStatus = fs.getFileStatus(path)

From 6a3aff2c0e15d49e578a211586145f210cfa3ee3 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <simonh@tw.ibm.com>
Date: Tue, 6 Sep 2016 17:51:54 +0800
Subject: [PATCH 3/3] Remove StatisticsSetter.

---
 .../plans/logical/StatisticsSetter.scala      | 25 -------------------
 1 file changed, 25 deletions(-)
 delete mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/StatisticsSetter.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/StatisticsSetter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/StatisticsSetter.scala
deleted file mode 100644
index a0f5518d19af..000000000000
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/StatisticsSetter.scala
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.sql.catalyst.plans.logical
-
-/**
- * A trait that is used to set statistics to [[LogicalPlan]].
- */
-private[sql] trait StatisticsSetter {
-  protected var _statistics: Option[Statistics] = None
-  private[sql] def setStatistics(stat: Statistics): Unit = _statistics = Option(stat)
-}