apache
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/ExplainUtils.scala‎
Lines changed: 86 additions & 72 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/ExplainUtils.scala‎
Lines changed: 86 additions & 72 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala‎
Lines changed: 13 additions & 8 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala‎
Lines changed: 13 additions & 8 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala‎
Lines changed: 3 additions & 1 deletion b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/Exchange.scala‎
Lines changed: 0 additions & 48 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/Exchange.scala‎
Lines changed: 0 additions & 48 deletions
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.execution
 
-import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.sql.AnalysisException
@@ -28,35 +27,22 @@ import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanExec, AdaptiveS
 object ExplainUtils extends AdaptiveSparkPlanHelper {
   /**
    * Given a input physical plan, performs the following tasks.
-   *   1. Computes the operator id for current operator and records it in the operaror
-   *      by setting a tag.
-   *   2. Computes the whole stage codegen id for current operator and records it in the
+   *   1. Computes the whole stage codegen id for current operator and records it in the
    *      operator by setting a tag.
-   *   3. Generate the two part explain output for this plan.
+   *   2. Generate the two part explain output for this plan.
    *      1. First part explains the operator tree with each operator tagged with an unique
    *         identifier.
-   *      2. Second part explans each operator in a verbose manner.
+   *      2. Second part explains each operator in a verbose manner.
    *
    * Note : This function skips over subqueries. They are handled by its caller.
    *
    * @param plan Input query plan to process
    * @param append function used to append the explain output
-   * @param startOperatorID The start value of operation id. The subsequent operations will
-   *                         be assigned higher value.
-   *
-   * @return The last generated operation id for this input plan. This is to ensure we
-   *         always assign incrementing unique id to each operator.
-   *
    */
   private def processPlanSkippingSubqueries[T <: QueryPlan[T]](
-      plan: => QueryPlan[T],
-      append: String => Unit,
-      startOperatorID: Int): Int = {
-
-    val operationIDs = new mutable.ArrayBuffer[(Int, QueryPlan[_])]()
-    var currentOperatorID = startOperatorID
+      plan: T,
+      append: String => Unit): Unit = {
     try {
-      currentOperatorID = generateOperatorIDs(plan, currentOperatorID, operationIDs)
       generateWholeStageCodegenIds(plan)
 
       QueryPlan.append(
@@ -67,31 +53,36 @@ object ExplainUtils extends AdaptiveSparkPlanHelper {
         printOperatorId = true)
 
       append("\n")
-      var i: Integer = 0
-      for ((opId, curPlan) <- operationIDs) {
-        append(curPlan.verboseStringWithOperatorId())
-      }
+
+      val operationsWithID = ArrayBuffer.empty[QueryPlan[_]]
+      collectOperatorsWithID(plan, operationsWithID)
+      operationsWithID.foreach(p => append(p.verboseStringWithOperatorId()))
+
     } catch {
       case e: AnalysisException => append(e.toString)
     }
-    currentOperatorID
   }
 
   /**
    * Given a input physical plan, performs the following tasks.
    *   1. Generates the explain output for the input plan excluding the subquery plans.
    *   2. Generates the explain output for each subquery referenced in the plan.
    */
-  def processPlan[T <: QueryPlan[T]](
-      plan: => QueryPlan[T],
-      append: String => Unit): Unit = {
+  def processPlan[T <: QueryPlan[T]](plan: T, append: String => Unit): Unit = {
     try {
-      val subqueries = ArrayBuffer.empty[(SparkPlan, Expression, BaseSubqueryExec)]
       var currentOperatorID = 0
-      currentOperatorID = processPlanSkippingSubqueries(plan, append, currentOperatorID)
+      currentOperatorID = generateOperatorIDs(plan, currentOperatorID)
+
+      val subqueries = ArrayBuffer.empty[(SparkPlan, Expression, BaseSubqueryExec)]
       getSubqueries(plan, subqueries)
-      var i = 0
 
+      subqueries.foldLeft(currentOperatorID) {
+        (curId, plan) => generateOperatorIDs(plan._3.child, curId)
+      }
+
+      processPlanSkippingSubqueries(plan, append)
+
+      var i = 0
       for (sub <- subqueries) {
         if (i == 0) {
           append("\n===== Subqueries =====\n\n")
@@ -104,10 +95,7 @@ object ExplainUtils extends AdaptiveSparkPlanHelper {
         // the explain output. In case of subquery reuse, we don't print subquery plan more
         // than once. So we skip [[ReusedSubqueryExec]] here.
         if (!sub._3.isInstanceOf[ReusedSubqueryExec]) {
-          currentOperatorID = processPlanSkippingSubqueries(
-            sub._3.child,
-            append,
-            currentOperatorID)
+          processPlanSkippingSubqueries(sub._3.child, append)
         }
         append("\n")
       }
@@ -117,59 +105,85 @@ object ExplainUtils extends AdaptiveSparkPlanHelper {
   }
 
   /**
-   * Traverses the supplied input plan in a bottem-up fashion does the following :
-   *    1. produces a map : operator identifier -> operator
-   *    2. Records the operator id via setting a tag in the operator.
+   * Traverses the supplied input plan in a bottom-up fashion and records the operator id via
+   * setting a tag in the operator.
    * Note :
-   *    1. Operator such as WholeStageCodegenExec and InputAdapter are skipped as they don't
-   *       appear in the explain output.
-   *    2. operator identifier starts at startOperatorID + 1
+   * - Operator such as WholeStageCodegenExec and InputAdapter are skipped as they don't
+   *     appear in the explain output.
+   * - Operator identifier starts at startOperatorID + 1
+   *
    * @param plan Input query plan to process
-   * @param startOperatorID The start value of operation id. The subsequent operations will
-   *                         be assigned higher value.
-   * @param operatorIDs A output parameter that contains a map of operator id and query plan. This
-   *                    is used by caller to print the detail portion of the plan.
-   * @return The last generated operation id for this input plan. This is to ensure we
-   *         always assign incrementing unique id to each operator.
+   * @param startOperatorID The start value of operation id. The subsequent operations will be
+   *                        assigned higher value.
+   * @return The last generated operation id for this input plan. This is to ensure we always
+   *         assign incrementing unique id to each operator.
    */
-  private def generateOperatorIDs(
-      plan: QueryPlan[_],
-      startOperatorID: Int,
-      operatorIDs: mutable.ArrayBuffer[(Int, QueryPlan[_])]): Int = {
+  private def generateOperatorIDs(plan: QueryPlan[_], startOperatorID: Int): Int = {
     var currentOperationID = startOperatorID
     // Skip the subqueries as they are not printed as part of main query block.
     if (plan.isInstanceOf[BaseSubqueryExec]) {
       return currentOperationID
     }
-    plan.foreachUp {
-      case p: WholeStageCodegenExec =>
-      case p: InputAdapter =>
-      case other: QueryPlan[_] =>
 
-        def setOpId(): Unit = if (other.getTagValue(QueryPlan.OP_ID_TAG).isEmpty) {
-          currentOperationID += 1
-          other.setTagValue(QueryPlan.OP_ID_TAG, currentOperationID)
-          operatorIDs += ((currentOperationID, other))
-        }
+    def setOpId(plan: QueryPlan[_]): Unit = if (plan.getTagValue(QueryPlan.OP_ID_TAG).isEmpty) {
+      currentOperationID += 1
+      plan.setTagValue(QueryPlan.OP_ID_TAG, currentOperationID)
+    }
 
-        other match {
-          case p: AdaptiveSparkPlanExec =>
-            currentOperationID =
-              generateOperatorIDs(p.executedPlan, currentOperationID, operatorIDs)
-            setOpId()
-          case p: QueryStageExec =>
-            currentOperationID = generateOperatorIDs(p.plan, currentOperationID, operatorIDs)
-            setOpId()
-          case _ =>
-            setOpId()
-            other.innerChildren.foldLeft(currentOperationID) {
-              (curId, plan) => generateOperatorIDs(plan, curId, operatorIDs)
-            }
+    plan.foreachUp {
+      case _: WholeStageCodegenExec =>
+      case _: InputAdapter =>
+      case p: AdaptiveSparkPlanExec =>
+        currentOperationID = generateOperatorIDs(p.executedPlan, currentOperationID)
+        setOpId(p)
+      case p: QueryStageExec =>
+        currentOperationID = generateOperatorIDs(p.plan, currentOperationID)
+        setOpId(p)
+      case other: QueryPlan[_] =>
+        setOpId(other)
+        other.innerChildren.foldLeft(currentOperationID) {
+          (curId, plan) => generateOperatorIDs(plan, curId)
         }
     }
     currentOperationID
   }
 
+  /**
+   * Traverses the supplied input plan in a bottom-up fashion and collects operators with assigned
+   * ids.
+   *
+   * @param plan Input query plan to process
+   * @param operators An output parameter that contains the operators.
+   */
+  private def collectOperatorsWithID(
+      plan: QueryPlan[_],
+      operators: ArrayBuffer[QueryPlan[_]]): Unit = {
+    // Skip the subqueries as they are not printed as part of main query block.
+    if (plan.isInstanceOf[BaseSubqueryExec]) {
+      return
+    }
+
+    def collectOperatorWithID(plan: QueryPlan[_]): Unit = {
+      if (plan.getTagValue(QueryPlan.OP_ID_TAG).isDefined) {
+        operators += plan
+      }
+    }
+
+    plan.foreachUp {
+      case _: WholeStageCodegenExec =>
+      case _: InputAdapter =>
+      case p: AdaptiveSparkPlanExec =>
+        collectOperatorsWithID(p.executedPlan, operators)
+        collectOperatorWithID(p)
+      case p: QueryStageExec =>
+        collectOperatorsWithID(p.plan, operators)
+        collectOperatorWithID(p)
+      case other: QueryPlan[_] =>
+        collectOperatorWithID(other)
+        other.innerChildren.foreach(collectOperatorsWithID(_, operators))
+    }
+  }
+
   /**
    * Traverses the supplied input plan in a top-down fashion and records the
    * whole stage code gen id in the plan via setting a tag.
 
@@ -35,7 +35,9 @@ import org.apache.spark.sql.catalyst.util.truncatedString
 import org.apache.spark.sql.execution.adaptive.{AdaptiveExecutionContext, EnsureRepartitionForWriting, InsertAdaptiveSparkPlan}
 import org.apache.spark.sql.execution.bucketing.DisableUnnecessaryBucketedScan
 import org.apache.spark.sql.execution.dynamicpruning.PlanDynamicPruningFilters
-import org.apache.spark.sql.execution.exchange.{EliminateShuffleExec, EnsureRequirements, ReuseExchange}
+import org.apache.spark.sql.execution.exchange.EliminateShuffleExec
+import org.apache.spark.sql.execution.exchange.EnsureRequirements
+import org.apache.spark.sql.execution.reuse.ReuseExchangeAndSubquery
 import org.apache.spark.sql.execution.streaming.{IncrementalExecution, OffsetSeqMetadata}
 import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
 import org.apache.spark.sql.streaming.OutputMode
@@ -147,7 +149,7 @@ class QueryExecution(
 
   protected def preparations: Seq[Rule[SparkPlan]] = {
     QueryExecution.preparations(sparkSession,
-      Option(InsertAdaptiveSparkPlan(AdaptiveExecutionContext(sparkSession, this))))
+      Option(InsertAdaptiveSparkPlan(AdaptiveExecutionContext(sparkSession, this))), false)
   }
 
   private def executePhase[T](phase: String)(block: => T): T = sparkSession.withActive {
@@ -323,7 +325,8 @@ object QueryExecution {
    */
   private[execution] def preparations(
       sparkSession: SparkSession,
-      adaptiveExecutionRule: Option[InsertAdaptiveSparkPlan] = None): Seq[Rule[SparkPlan]] = {
+      adaptiveExecutionRule: Option[InsertAdaptiveSparkPlan] = None,
+      subquery: Boolean): Seq[Rule[SparkPlan]] = {
     // `AdaptiveSparkPlanExec` is a leaf node. If inserted, all the following rules will be no-op
     // as the original plan is hidden behind `AdaptiveSparkPlanExec`.
     adaptiveExecutionRule.toSeq ++
@@ -339,10 +342,12 @@ object QueryExecution {
       EliminateShuffleExec,
       DisableUnnecessaryBucketedScan,
       ApplyColumnarRulesAndInsertTransitions(sparkSession.sessionState.columnarRules),
-      CollapseCodegenStages(),
-      ReuseExchange,
-      ReuseSubquery
-    )
+      CollapseCodegenStages()) ++
+      (if (subquery) {
+        Nil
+      } else {
+        Seq(ReuseExchangeAndSubquery)
+      })
   }
 
   /**
@@ -373,7 +378,7 @@ object QueryExecution {
    * Prepare the [[SparkPlan]] for execution.
    */
   def prepareExecutedPlan(spark: SparkSession, plan: SparkPlan): SparkPlan = {
-    prepareForExecution(preparations(spark), plan)
+    prepareForExecution(preparations(spark, subquery = true), plan)
   }
 
   /**
 
@@ -72,7 +72,9 @@ case class HashAggregateExec(
   // This is for testing. We force TungstenAggregationIterator to fall back to the unsafe row hash
   // map and/or the sort-based aggregation once it has processed a given number of input rows.
   private val testFallbackStartsAt: Option[(Int, Int)] = {
-    sqlContext.getConf("spark.sql.TungstenAggregate.testFallbackStartsAt", null) match {
+    Option(sqlContext).map { s =>
+      s.getConf("spark.sql.TungstenAggregate.testFallbackStartsAt", null)
+    }.orNull match {
       case null | "" => None
       case fallbackStartsAt =>
         val splits = fallbackStartsAt.split(",").map(_.trim)
 
@@ -95,51 +95,3 @@ case class ReusedExchangeExec(override val output: Seq[Attribute], child: Exchan
        |""".stripMargin
   }
 }
-
-/**
- * Find out duplicated exchanges in the spark plan, then use the same exchange for all the
- * references.
- */
-object ReuseExchange extends Rule[SparkPlan] {
-
-  def apply(plan: SparkPlan): SparkPlan = {
-    if (!conf.exchangeReuseEnabled) {
-      return plan
-    }
-    // Build a hash map using schema of exchanges to avoid O(N*N) sameResult calls.
-    val exchanges = mutable.HashMap[StructType, ArrayBuffer[Exchange]]()
-
-    // Replace a Exchange duplicate with a ReusedExchange
-    def reuse: PartialFunction[Exchange, SparkPlan] = {
-      case exchange: Exchange =>
-        val sameSchema = exchanges.getOrElseUpdate(exchange.schema, ArrayBuffer[Exchange]())
-        val samePlan = sameSchema.find { e =>
-          exchange.sameResult(e)
-        }
-        if (samePlan.isDefined) {
-          // Keep the output of this exchange, the following plans require that to resolve
-          // attributes.
-          ReusedExchangeExec(exchange.output, samePlan.get)
-        } else {
-          sameSchema += exchange
-          exchange
-        }
-    }
-
-    plan transformUp {
-      case exchange: Exchange => reuse(exchange)
-    } transformAllExpressions {
-      // Lookup inside subqueries for duplicate exchanges
-      case in: InSubqueryExec =>
-        val newIn = in.plan.transformUp {
-          case exchange: Exchange => reuse(exchange)
-        }
-        in.copy(plan = newIn.asInstanceOf[BaseSubqueryExec])
-      case in: InBloomFilterSubqueryExec =>
-        val newIn = in.plan.transformUp {
-          case exchange: Exchange => reuse(exchange)
-        }
-        in.copy(plan = newIn.asInstanceOf[BaseSubqueryExec])
-    }
-  }
-}