fixup

spark fixup fixup fixup
apache · Mar 6, 2024 · 3b514b1 · 3b514b1
1 parent e9fdd6e
commit 3b514b1
Show file tree

Hide file tree

Showing 17 changed files with 577 additions and 551 deletions.
diff --git a/...lickhouse/src/main/scala/io/glutenproject/backendsapi/clickhouse/CHSparkPlanExecApi.scala b/...lickhouse/src/main/scala/io/glutenproject/backendsapi/clickhouse/CHSparkPlanExecApi.scala
@@ -21,8 +21,9 @@ import io.glutenproject.backendsapi.{BackendsApiManager, SparkPlanExecApi}
 import io.glutenproject.execution._
 import io.glutenproject.expression._
 import io.glutenproject.expression.ConverterUtils.FunctionConfig
-import io.glutenproject.extension.{FallbackBroadcastHashJoin, FallbackBroadcastHashJoinPrepQueryStage, TransformPreOverrides}
+import io.glutenproject.extension.{FallbackBroadcastHashJoin, FallbackBroadcastHashJoinPrepQueryStage}
 import io.glutenproject.extension.columnar.AddTransformHintRule
+import io.glutenproject.extension.columnar.MiscColumnarRules.TransformPreOverrides
 import io.glutenproject.substrait.expression.{ExpressionBuilder, ExpressionNode, WindowFunctionNode}
 import io.glutenproject.utils.CHJoinValidateUtil
 import io.glutenproject.vectorized.CHColumnarBatchSerializer
@@ -54,9 +55,7 @@ import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, ShuffleEx
 import org.apache.spark.sql.execution.joins.{BuildSideRelation, ClickHouseBuildSideRelation, HashedRelationBroadcastMode}
 import org.apache.spark.sql.execution.metric.SQLMetric
 import org.apache.spark.sql.execution.utils.{CHExecUtil, PushDownUtil}
-import org.apache.spark.sql.extension.ClickHouseAnalysis
-import org.apache.spark.sql.extension.CommonSubexpressionEliminateRule
-import org.apache.spark.sql.extension.RewriteDateTimestampComparisonRule
+import org.apache.spark.sql.extension.{ClickHouseAnalysis, CommonSubexpressionEliminateRule, RewriteDateTimestampComparisonRule}
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.vectorized.ColumnarBatch
 

diff --git a/backends-velox/src/main/scala/io/glutenproject/execution/HashAggregateExecTransformer.scala b/backends-velox/src/main/scala/io/glutenproject/execution/HashAggregateExecTransformer.scala
@@ -19,7 +19,7 @@ package io.glutenproject.execution
 import io.glutenproject.backendsapi.BackendsApiManager
 import io.glutenproject.expression._
 import io.glutenproject.expression.ConverterUtils.FunctionConfig
-import io.glutenproject.extension.RewriteTypedImperativeAggregate
+import io.glutenproject.extension.columnar.RewriteTypedImperativeAggregate
 import io.glutenproject.substrait.`type`.{TypeBuilder, TypeNode}
 import io.glutenproject.substrait.{AggregationParams, SubstraitContext}
 import io.glutenproject.substrait.expression.{AggregateFunctionNode, ExpressionBuilder, ExpressionNode, ScalarFunctionNode}

diff --git a/gluten-core/src/main/scala/io/glutenproject/extension/ColumnarOverrides.scala b/gluten-core/src/main/scala/io/glutenproject/extension/ColumnarOverrides.scala
diff --git a/...sion/CollapseProjectExecTransformer.scala → ...mnar/CollapseProjectExecTransformer.scala b/...sion/CollapseProjectExecTransformer.scala → ...mnar/CollapseProjectExecTransformer.scala
@@ -14,7 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package io.glutenproject.extension
+package io.glutenproject.extension.columnar
 
 import io.glutenproject.GlutenConfig
 import io.glutenproject.execution.ProjectExecTransformer

diff --git a/...oject/extension/ColumnarTransitions.scala → ...ension/columnar/ColumnarTransitions.scala b/...oject/extension/ColumnarTransitions.scala → ...ension/columnar/ColumnarTransitions.scala
@@ -14,7 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package io.glutenproject.extension
+package io.glutenproject.extension.columnar
 
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution.{ApplyColumnarRulesAndInsertTransitions, ColumnarToRowExec, RowToColumnarExec, SparkPlan}

diff --git a/...tension/EnsureLocalSortRequirements.scala → ...olumnar/EnsureLocalSortRequirements.scala b/...tension/EnsureLocalSortRequirements.scala → ...olumnar/EnsureLocalSortRequirements.scala
@@ -14,11 +14,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package io.glutenproject.extension
+package io.glutenproject.extension.columnar
 
 import io.glutenproject.GlutenConfig
 import io.glutenproject.execution.SortExecTransformer
-import io.glutenproject.extension.columnar.TransformHints
 
 import org.apache.spark.sql.catalyst.expressions.SortOrder
 import org.apache.spark.sql.catalyst.rules.Rule

diff --git a/...ject/extension/ExpandFallbackPolicy.scala → ...nsion/columnar/ExpandFallbackPolicy.scala b/...ject/extension/ExpandFallbackPolicy.scala → ...nsion/columnar/ExpandFallbackPolicy.scala
@@ -14,11 +14,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package io.glutenproject.extension
+package io.glutenproject.extension.columnar
 
 import io.glutenproject.GlutenConfig
 import io.glutenproject.execution.BroadcastHashJoinExecTransformer
-import io.glutenproject.extension.columnar.{TRANSFORM_UNSUPPORTED, TransformHints}
+import io.glutenproject.extension.GlutenPlan
+import io.glutenproject.extension.columnar.MiscColumnarRules.TransformPostOverrides
 import io.glutenproject.utils.PlanUtil
 
 import org.apache.spark.rdd.RDD
@@ -33,36 +34,24 @@ import org.apache.spark.sql.execution.exchange.Exchange
 
 // spotless:off
 /**
- * Note, this rule should only fallback to row-based plan if there is no harm.
- * The follow case should be handled carefully
+ * Note, this rule should only fallback to row-based plan if there is no harm. The follow case
+ * should be handled carefully
  *
- * 1. A BHJ and the previous broadcast exchange is columnar
- *    We should still make the BHJ columnar, otherwise it will fail if
- *    the vanilla BHJ accept a columnar broadcast exchange, e.g.,
+ *   1. A BHJ and the previous broadcast exchange is columnar We should still make the BHJ columnar,
+ *      otherwise it will fail if the vanilla BHJ accept a columnar broadcast exchange, e.g.,
  *
- *    Scan                Scan
- *      \                  |
- *        \     Columnar Broadcast Exchange
- *          \       /
- *             BHJ
- *              |
- *       VeloxColumnarToRow
- *              |
- *           Project (unsupport columnar)
+ * Scan Scan \ | \ Columnar Broadcast Exchange \ / BHJ \| VeloxColumnarToRow \| Project (unsupport
+ * columnar)
  *
- * 2. The previous shuffle exchange stage is a columnar shuffle exchange
- *    We should use VeloxColumnarToRow rather than vanilla Spark ColumnarToRowExec, e.g.,
+ * 2. The previous shuffle exchange stage is a columnar shuffle exchange We should use
+ * VeloxColumnarToRow rather than vanilla Spark ColumnarToRowExec, e.g.,
  *
- *             Scan
- *              |
- *    Columnar Shuffle Exchange
- *              |
- *       VeloxColumnarToRow
- *              |
- *           Project (unsupport columnar)
+ * Scan \| Columnar Shuffle Exchange \| VeloxColumnarToRow \| Project (unsupport columnar)
  *
- * @param isAdaptiveContext If is inside AQE
- * @param originalPlan The vanilla SparkPlan without apply gluten transform rules
+ * @param isAdaptiveContext
+ *   If is inside AQE
+ * @param originalPlan
+ *   The vanilla SparkPlan without apply gluten transform rules
  */
 // spotless:on
 case class ExpandFallbackPolicy(isAdaptiveContext: Boolean, originalPlan: SparkPlan)
@@ -105,39 +94,40 @@ case class ExpandFallbackPolicy(isAdaptiveContext: Boolean, originalPlan: SparkP
     transitionCost
   }
 
+  // spotless:off
   /**
-   * When making a stage fall back, it's possible that we need a ColumnarToRow to adapt to last
-   * stage's columnar output. So we need to evaluate the cost, i.e., the number of required
-   * ColumnarToRow between entirely fallback stage and last stage(s). Thus, we can avoid possible
-   * performance degradation caused by fallback policy.
+   * Note, this rule should only fallback to row-based plan if there is no harm.
+   * The follow case should be handled carefully
    *
-   * spotless:off
+   * 1. A BHJ and the previous broadcast exchange is columnar
+   *    We should still make the BHJ columnar, otherwise it will fail if
+   *    the vanilla BHJ accept a columnar broadcast exchange, e.g.,
    *
-   * Spark plan before applying fallback policy:
-   *
-   *        ColumnarExchange
-   *  ----------- | --------------- last stage
-   *    HashAggregateTransformer
+   *    Scan                Scan
+   *      \                  |
+   *        \     Columnar Broadcast Exchange
+   *          \       /
+   *             BHJ
    *              |
-   *        ColumnarToRow
+   *       VeloxColumnarToRow
    *              |
-   *           Project
-   *
-   * To illustrate the effect if cost is not taken into account, here is spark plan
-   * after applying whole stage fallback policy (threshold = 1):
+   *           Project (unsupport columnar)
    *
-   *        ColumnarExchange
-   *  -----------  | --------------- last stage
-   *         ColumnarToRow
-   *               |
-   *         HashAggregate
-   *               |
-   *            Project
+   * 2. The previous shuffle exchange stage is a columnar shuffle exchange
+   *    We should use VeloxColumnarToRow rather than vanilla Spark ColumnarToRowExec, e.g.,
    *
-   *  So by considering the cost, the fallback policy will not be applied.
+   *             Scan
+   *              |
+   *    Columnar Shuffle Exchange
+   *              |
+   *       VeloxColumnarToRow
+   *              |
+   *           Project (unsupport columnar)
    *
-   * spotless:on
+   * @param isAdaptiveContext If is inside AQE
+   * @param originalPlan The vanilla SparkPlan without apply gluten transform rules
    */
+  // spotless:on
   private def countStageFallbackTransitionCost(plan: SparkPlan): Int = {
     var stageFallbackTransitionCost = 0
 

diff --git a/...tension/MergeTwoPhasesHashAggregate.scala → ...olumnar/MergeTwoPhasesHashAggregate.scala b/...tension/MergeTwoPhasesHashAggregate.scala → ...olumnar/MergeTwoPhasesHashAggregate.scala
@@ -14,7 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package io.glutenproject.extension
+package io.glutenproject.extension.columnar
 
 import io.glutenproject.GlutenConfig
 import io.glutenproject.backendsapi.BackendsApiManager