1717
1818package org .apache .spark .sql .execution
1919
20- import scala .collection .mutable
2120import scala .collection .mutable .ArrayBuffer
2221
2322import org .apache .spark .sql .AnalysisException
@@ -28,35 +27,22 @@ import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanExec, AdaptiveS
2827object ExplainUtils extends AdaptiveSparkPlanHelper {
2928 /**
3029 * Given a input physical plan, performs the following tasks.
31- * 1. Computes the operator id for current operator and records it in the operaror
32- * by setting a tag.
33- * 2. Computes the whole stage codegen id for current operator and records it in the
30+ * 1. Computes the whole stage codegen id for current operator and records it in the
3431 * operator by setting a tag.
35- * 3 . Generate the two part explain output for this plan.
32+ * 2 . Generate the two part explain output for this plan.
3633 * 1. First part explains the operator tree with each operator tagged with an unique
3734 * identifier.
38- * 2. Second part explans each operator in a verbose manner.
35+ * 2. Second part explains each operator in a verbose manner.
3936 *
4037 * Note : This function skips over subqueries. They are handled by its caller.
4138 *
4239 * @param plan Input query plan to process
4340 * @param append function used to append the explain output
44- * @param startOperatorID The start value of operation id. The subsequent operations will
45- * be assigned higher value.
46- *
47- * @return The last generated operation id for this input plan. This is to ensure we
48- * always assign incrementing unique id to each operator.
49- *
5041 */
5142 private def processPlanSkippingSubqueries [T <: QueryPlan [T ]](
52- plan : => QueryPlan [T ],
53- append : String => Unit ,
54- startOperatorID : Int ): Int = {
55-
56- val operationIDs = new mutable.ArrayBuffer [(Int , QueryPlan [_])]()
57- var currentOperatorID = startOperatorID
43+ plan : T ,
44+ append : String => Unit ): Unit = {
5845 try {
59- currentOperatorID = generateOperatorIDs(plan, currentOperatorID, operationIDs)
6046 generateWholeStageCodegenIds(plan)
6147
6248 QueryPlan .append(
@@ -67,31 +53,36 @@ object ExplainUtils extends AdaptiveSparkPlanHelper {
6753 printOperatorId = true )
6854
6955 append(" \n " )
70- var i : Integer = 0
71- for ((opId, curPlan) <- operationIDs) {
72- append(curPlan.verboseStringWithOperatorId())
73- }
56+
57+ val operationsWithID = ArrayBuffer .empty[QueryPlan [_]]
58+ collectOperatorsWithID(plan, operationsWithID)
59+ operationsWithID.foreach(p => append(p.verboseStringWithOperatorId()))
60+
7461 } catch {
7562 case e : AnalysisException => append(e.toString)
7663 }
77- currentOperatorID
7864 }
7965
8066 /**
8167 * Given a input physical plan, performs the following tasks.
8268 * 1. Generates the explain output for the input plan excluding the subquery plans.
8369 * 2. Generates the explain output for each subquery referenced in the plan.
8470 */
85- def processPlan [T <: QueryPlan [T ]](
86- plan : => QueryPlan [T ],
87- append : String => Unit ): Unit = {
71+ def processPlan [T <: QueryPlan [T ]](plan : T , append : String => Unit ): Unit = {
8872 try {
89- val subqueries = ArrayBuffer .empty[(SparkPlan , Expression , BaseSubqueryExec )]
9073 var currentOperatorID = 0
91- currentOperatorID = processPlanSkippingSubqueries(plan, append, currentOperatorID)
74+ currentOperatorID = generateOperatorIDs(plan, currentOperatorID)
75+
76+ val subqueries = ArrayBuffer .empty[(SparkPlan , Expression , BaseSubqueryExec )]
9277 getSubqueries(plan, subqueries)
93- var i = 0
9478
79+ subqueries.foldLeft(currentOperatorID) {
80+ (curId, plan) => generateOperatorIDs(plan._3.child, curId)
81+ }
82+
83+ processPlanSkippingSubqueries(plan, append)
84+
85+ var i = 0
9586 for (sub <- subqueries) {
9687 if (i == 0 ) {
9788 append(" \n ===== Subqueries =====\n\n " )
@@ -104,10 +95,7 @@ object ExplainUtils extends AdaptiveSparkPlanHelper {
10495 // the explain output. In case of subquery reuse, we don't print subquery plan more
10596 // than once. So we skip [[ReusedSubqueryExec]] here.
10697 if (! sub._3.isInstanceOf [ReusedSubqueryExec ]) {
107- currentOperatorID = processPlanSkippingSubqueries(
108- sub._3.child,
109- append,
110- currentOperatorID)
98+ processPlanSkippingSubqueries(sub._3.child, append)
11199 }
112100 append(" \n " )
113101 }
@@ -117,59 +105,85 @@ object ExplainUtils extends AdaptiveSparkPlanHelper {
117105 }
118106
119107 /**
120- * Traverses the supplied input plan in a bottem-up fashion does the following :
121- * 1. produces a map : operator identifier -> operator
122- * 2. Records the operator id via setting a tag in the operator.
108+ * Traverses the supplied input plan in a bottom-up fashion and records the operator id via
109+ * setting a tag in the operator.
123110 * Note :
124- * 1. Operator such as WholeStageCodegenExec and InputAdapter are skipped as they don't
125- * appear in the explain output.
126- * 2. operator identifier starts at startOperatorID + 1
111+ * - Operator such as WholeStageCodegenExec and InputAdapter are skipped as they don't
112+ * appear in the explain output.
113+ * - Operator identifier starts at startOperatorID + 1
114+ *
127115 * @param plan Input query plan to process
128- * @param startOperatorID The start value of operation id. The subsequent operations will
129- * be assigned higher value.
130- * @param operatorIDs A output parameter that contains a map of operator id and query plan. This
131- * is used by caller to print the detail portion of the plan.
132- * @return The last generated operation id for this input plan. This is to ensure we
133- * always assign incrementing unique id to each operator.
116+ * @param startOperatorID The start value of operation id. The subsequent operations will be
117+ * assigned higher value.
118+ * @return The last generated operation id for this input plan. This is to ensure we always
119+ * assign incrementing unique id to each operator.
134120 */
135- private def generateOperatorIDs (
136- plan : QueryPlan [_],
137- startOperatorID : Int ,
138- operatorIDs : mutable.ArrayBuffer [(Int , QueryPlan [_])]): Int = {
121+ private def generateOperatorIDs (plan : QueryPlan [_], startOperatorID : Int ): Int = {
139122 var currentOperationID = startOperatorID
140123 // Skip the subqueries as they are not printed as part of main query block.
141124 if (plan.isInstanceOf [BaseSubqueryExec ]) {
142125 return currentOperationID
143126 }
144- plan.foreachUp {
145- case p : WholeStageCodegenExec =>
146- case p : InputAdapter =>
147- case other : QueryPlan [_] =>
148127
149- def setOpId (): Unit = if (other.getTagValue(QueryPlan .OP_ID_TAG ).isEmpty) {
150- currentOperationID += 1
151- other.setTagValue(QueryPlan .OP_ID_TAG , currentOperationID)
152- operatorIDs += ((currentOperationID, other))
153- }
128+ def setOpId (plan : QueryPlan [_]): Unit = if (plan.getTagValue(QueryPlan .OP_ID_TAG ).isEmpty) {
129+ currentOperationID += 1
130+ plan.setTagValue(QueryPlan .OP_ID_TAG , currentOperationID)
131+ }
154132
155- other match {
156- case p : AdaptiveSparkPlanExec =>
157- currentOperationID =
158- generateOperatorIDs(p.executedPlan, currentOperationID, operatorIDs)
159- setOpId( )
160- case p : QueryStageExec =>
161- currentOperationID = generateOperatorIDs(p.plan, currentOperationID, operatorIDs)
162- setOpId( )
163- case _ =>
164- setOpId()
165- other.innerChildren.foldLeft(currentOperationID) {
166- (curId, plan) => generateOperatorIDs(plan, curId, operatorIDs)
167- }
133+ plan.foreachUp {
134+ case _ : WholeStageCodegenExec =>
135+ case _ : InputAdapter =>
136+ case p : AdaptiveSparkPlanExec =>
137+ currentOperationID = generateOperatorIDs(p.executedPlan, currentOperationID )
138+ setOpId(p)
139+ case p : QueryStageExec =>
140+ currentOperationID = generateOperatorIDs(p.plan, currentOperationID )
141+ setOpId(p)
142+ case other : QueryPlan [_] =>
143+ setOpId(other)
144+ other.innerChildren.foldLeft(currentOperationID) {
145+ (curId, plan) => generateOperatorIDs(plan, curId)
168146 }
169147 }
170148 currentOperationID
171149 }
172150
151+ /**
152+ * Traverses the supplied input plan in a bottom-up fashion and collects operators with assigned
153+ * ids.
154+ *
155+ * @param plan Input query plan to process
156+ * @param operators An output parameter that contains the operators.
157+ */
158+ private def collectOperatorsWithID (
159+ plan : QueryPlan [_],
160+ operators : ArrayBuffer [QueryPlan [_]]): Unit = {
161+ // Skip the subqueries as they are not printed as part of main query block.
162+ if (plan.isInstanceOf [BaseSubqueryExec ]) {
163+ return
164+ }
165+
166+ def collectOperatorWithID (plan : QueryPlan [_]): Unit = {
167+ if (plan.getTagValue(QueryPlan .OP_ID_TAG ).isDefined) {
168+ operators += plan
169+ }
170+ }
171+
172+ plan.foreachUp {
173+ case _ : WholeStageCodegenExec =>
174+ case _ : InputAdapter =>
175+ case p : AdaptiveSparkPlanExec =>
176+ collectOperatorsWithID(p.executedPlan, operators)
177+ collectOperatorWithID(p)
178+ case p : QueryStageExec =>
179+ collectOperatorsWithID(p.plan, operators)
180+ collectOperatorWithID(p)
181+ case other : QueryPlan [_] =>
182+ collectOperatorWithID(other)
183+ other.innerChildren.foreach(collectOperatorsWithID(_, operators))
184+ }
185+ }
186+
173187 /**
174188 * Traverses the supplied input plan in a top-down fashion and records the
175189 * whole stage code gen id in the plan via setting a tag.
0 commit comments