diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index aa8540fb4455..63299a39a9f7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -185,9 +185,9 @@ abstract class Optimizer(catalogManager: CatalogManager)
RemoveLiteralFromGroupExpressions,
RemoveRepetitionFromGroupExpressions) :: Nil ++
operatorOptimizationBatch) :+
- // This batch rewrites data source plans and should be run after the operator
- // optimization batch and before any batches that depend on stats.
- Batch("Data Source Rewrite Rules", Once, dataSourceRewriteRules: _*) :+
+ // This batch rewrites plans after the operator optimization and
+ // before any batches that depend on stats.
+ Batch("Pre CBO Rules", Once, preCBORules: _*) :+
// This batch pushes filters and projections into scan nodes. Before this batch, the logical
// plan may contain nodes that do not report stats. Anything that uses stats must run after
// this batch.
@@ -293,10 +293,10 @@ abstract class Optimizer(catalogManager: CatalogManager)
def earlyScanPushDownRules: Seq[Rule[LogicalPlan]] = Nil
/**
- * Override to provide additional rules for rewriting data source plans. Such rules will be
- * applied after operator optimization rules and before any rules that depend on stats.
+ * Override to provide additional rules for rewriting plans after operator optimization rules and
+ * before any cost-based optimization rules that depend on stats.
*/
- def dataSourceRewriteRules: Seq[Rule[LogicalPlan]] = Nil
+ def preCBORules: Seq[Rule[LogicalPlan]] = Nil
/**
* Returns (defaultBatches - (excludedRules - nonExcludableRules)), the rule batches that
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSessionExtensions.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSessionExtensions.scala
index d5d969032a5e..074906a971b1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSessionExtensions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSessionExtensions.scala
@@ -40,7 +40,7 @@ import org.apache.spark.sql.execution.{ColumnarRule, SparkPlan}
*
Analyzer Rules.
* Check Analysis Rules.
* Optimizer Rules.
- * Data Source Rewrite Rules.
+ * Pre CBO Rules.
* Planning Strategies.
* Customized Parser.
* (External) Catalog listeners.
@@ -200,19 +200,19 @@ class SparkSessionExtensions {
optimizerRules += builder
}
- private[this] val dataSourceRewriteRules = mutable.Buffer.empty[RuleBuilder]
+ private[this] val preCBORules = mutable.Buffer.empty[RuleBuilder]
- private[sql] def buildDataSourceRewriteRules(session: SparkSession): Seq[Rule[LogicalPlan]] = {
- dataSourceRewriteRules.map(_.apply(session)).toSeq
+ private[sql] def buildPreCBORules(session: SparkSession): Seq[Rule[LogicalPlan]] = {
+ preCBORules.map(_.apply(session)).toSeq
}
/**
- * Inject an optimizer `Rule` builder that rewrites data source plans into the [[SparkSession]].
- * The injected rules will be executed after the operator optimization batch and before rules
- * that depend on stats.
+ * Inject an optimizer `Rule` builder that rewrites logical plans into the [[SparkSession]].
+ * The injected rules will be executed once after the operator optimization batch and
+ * before any cost-based optimization rules that depend on stats.
*/
- def injectDataSourceRewriteRule(builder: RuleBuilder): Unit = {
- dataSourceRewriteRules += builder
+ def injectPreCBORule(builder: RuleBuilder): Unit = {
+ preCBORules += builder
}
private[this] val plannerStrategyBuilders = mutable.Buffer.empty[StrategyBuilder]
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
index 8fb351a2a3b2..6b84f0e636c1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
@@ -231,8 +231,8 @@ abstract class BaseSessionStateBuilder(
override def earlyScanPushDownRules: Seq[Rule[LogicalPlan]] =
super.earlyScanPushDownRules ++ customEarlyScanPushDownRules
- override def dataSourceRewriteRules: Seq[Rule[LogicalPlan]] =
- super.dataSourceRewriteRules ++ customDataSourceRewriteRules
+ override def preCBORules: Seq[Rule[LogicalPlan]] =
+ super.preCBORules ++ customPreCBORules
override def extendedOperatorOptimizationRules: Seq[Rule[LogicalPlan]] =
super.extendedOperatorOptimizationRules ++ customOperatorOptimizationRules
@@ -258,13 +258,13 @@ abstract class BaseSessionStateBuilder(
protected def customEarlyScanPushDownRules: Seq[Rule[LogicalPlan]] = Nil
/**
- * Custom rules for rewriting data source plans to add to the Optimizer. Prefer overriding
- * this instead of creating your own Optimizer.
+ * Custom rules for rewriting plans after operator optimization and before CBO.
+ * Prefer overriding this instead of creating your own Optimizer.
*
* Note that this may NOT depend on the `optimizer` function.
*/
- protected def customDataSourceRewriteRules: Seq[Rule[LogicalPlan]] = {
- extensions.buildDataSourceRewriteRules(session)
+ protected def customPreCBORules: Seq[Rule[LogicalPlan]] = {
+ extensions.buildPreCBORules(session)
}
/**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
index 7c19f98b762f..35d251383561 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
@@ -88,9 +88,9 @@ class SparkSessionExtensionSuite extends SparkFunSuite {
}
}
- test("SPARK-33621: inject data source rewrite rule") {
- withSession(Seq(_.injectDataSourceRewriteRule(MyRule))) { session =>
- assert(session.sessionState.optimizer.dataSourceRewriteRules.contains(MyRule(session)))
+ test("SPARK-33621: inject a pre CBO rule") {
+ withSession(Seq(_.injectPreCBORule(MyRule))) { session =>
+ assert(session.sessionState.optimizer.preCBORules.contains(MyRule(session)))
}
}