-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-28346][SQL] clone the query plan between analyzer, optimizer and planner #25111
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.analysis.UnsupportedOperationChecker | |
| import org.apache.spark.sql.catalyst.plans.QueryPlan | ||
| import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, ReturnAnswer} | ||
| import org.apache.spark.sql.catalyst.rules.Rule | ||
| import org.apache.spark.sql.catalyst.util.StringUtils.{PlanStringConcat, StringConcat} | ||
| import org.apache.spark.sql.catalyst.util.StringUtils.PlanStringConcat | ||
| import org.apache.spark.sql.catalyst.util.truncatedString | ||
| import org.apache.spark.sql.execution.adaptive.InsertAdaptiveSparkPlan | ||
| import org.apache.spark.sql.execution.exchange.{EnsureRequirements, ReuseExchange} | ||
|
|
@@ -60,36 +60,38 @@ class QueryExecution( | |
|
|
||
| lazy val analyzed: LogicalPlan = tracker.measurePhase(QueryPlanningTracker.ANALYSIS) { | ||
| SparkSession.setActiveSession(sparkSession) | ||
| // We can't clone `logical` here, which will reset the `_analyzed` flag. | ||
| sparkSession.sessionState.analyzer.executeAndCheck(logical, tracker) | ||
| } | ||
|
|
||
| lazy val withCachedData: LogicalPlan = { | ||
| assertAnalyzed() | ||
| assertSupported() | ||
| sparkSession.sharedState.cacheManager.useCachedData(analyzed) | ||
| // clone the plan to avoid sharing the plan instance between different stages like analyzing, | ||
| // optimizing and planning. | ||
| sparkSession.sharedState.cacheManager.useCachedData(analyzed.clone()) | ||
| } | ||
|
|
||
| lazy val optimizedPlan: LogicalPlan = tracker.measurePhase(QueryPlanningTracker.OPTIMIZATION) { | ||
| sparkSession.sessionState.optimizer.executeAndTrack(withCachedData, tracker) | ||
| // clone the plan to avoid sharing the plan instance between different stages like analyzing, | ||
| // optimizing and planning. | ||
| sparkSession.sessionState.optimizer.executeAndTrack(withCachedData.clone(), tracker) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
If we decide to clone the plan after each stage, will any test fail if we do not clone it?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. test added |
||
| } | ||
|
|
||
| lazy val sparkPlan: SparkPlan = tracker.measurePhase(QueryPlanningTracker.PLANNING) { | ||
| SparkSession.setActiveSession(sparkSession) | ||
| // Runtime re-optimization requires a unique instance of every node in the logical plan. | ||
| val logicalPlan = if (sparkSession.sessionState.conf.adaptiveExecutionEnabled) { | ||
| optimizedPlan.clone() | ||
| } else { | ||
| optimizedPlan | ||
| } | ||
| // TODO: We use next(), i.e. take the first plan returned by the planner, here for now, | ||
| // but we will implement to choose the best plan. | ||
| planner.plan(ReturnAnswer(logicalPlan)).next() | ||
| // Clone the logical plan here, in case the planner rules change the states of the logical plan. | ||
| planner.plan(ReturnAnswer(optimizedPlan.clone())).next() | ||
| } | ||
|
|
||
| // executedPlan should not be used to initialize any SparkPlan. It should be | ||
| // only used for execution. | ||
| lazy val executedPlan: SparkPlan = tracker.measurePhase(QueryPlanningTracker.PLANNING) { | ||
| prepareForExecution(sparkPlan) | ||
| // clone the plan to avoid sharing the plan instance between different stages like analyzing, | ||
| // optimizing and planning. | ||
| prepareForExecution(sparkPlan.clone()) | ||
| } | ||
|
|
||
| /** | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -52,4 +52,10 @@ case class SaveIntoDataSourceCommand( | |
| val redacted = SQLConf.get.redactOptions(options) | ||
| s"SaveIntoDataSourceCommand ${dataSource}, ${redacted}, ${mode}" | ||
| } | ||
|
|
||
| // Override `clone` since the default implementation will turn `CaseInsensitiveMap` to a normal | ||
| // map. | ||
| override def clone(): LogicalPlan = { | ||
| SaveIntoDataSourceCommand(query.clone(), dataSource, options, mode) | ||
|
||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe not necessary, but should we clone
logicaltoo before sending to analyzer?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yea I think we should