-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-14269][SCHEDULER] Eliminate unnecessary submitStage() call. #12060
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
9a1724d
f8b7910
0ea3fc8
697b322
d6d3c34
1636531
92e9f44
4b412f5
8fb9a14
e2cfeaf
e3c0de3
b73eaac
88c4bc1
a304235
f1407c0
b0511f7
eb230bb
0c0d9ed
4eb8c05
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -726,7 +726,6 @@ class DAGScheduler( | |
| reason = "as part of cancellation of all jobs")) | ||
| activeJobs.clear() // These should already be empty by this point, | ||
| jobIdToActiveJob.clear() // but just in case we lost track of some jobs... | ||
| submitWaitingStages() | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -752,23 +751,21 @@ class DAGScheduler( | |
| submitStage(stage) | ||
| } | ||
| } | ||
| submitWaitingStages() | ||
| } | ||
|
|
||
| /** | ||
| * Check for waiting stages which are now eligible for resubmission. | ||
| * Ordinarily run on every iteration of the event loop. | ||
| * Submits stages that depend on the given parent stage. Called when the parent stage completes | ||
| * successfully. | ||
| */ | ||
| private def submitWaitingStages() { | ||
| // TODO: We might want to run this less often, when we are sure that something has become | ||
| // runnable that wasn't before. | ||
| logTrace("Checking for newly runnable parent stages") | ||
| private def submitWaitingChildStages(parent: Stage) { | ||
| logTrace(s"Checking if any dependencies of $parent are now runnable") | ||
| logTrace("running: " + runningStages) | ||
| logTrace("waiting: " + waitingStages) | ||
| logTrace("failed: " + failedStages) | ||
| val waitingStagesCopy = waitingStages.toArray | ||
| waitingStages.clear() | ||
| for (stage <- waitingStagesCopy.sortBy(_.firstJobId)) { | ||
| val childStages = waitingStages.filter(_.parents.contains(parent)).toArray | ||
| waitingStages --= childStages | ||
| for (stage <- childStages.sortBy(_.firstJobId)) { | ||
| submitStage(stage) | ||
| } | ||
| } | ||
|
|
@@ -793,23 +790,20 @@ class DAGScheduler( | |
| } | ||
| val jobIds = activeInGroup.map(_.jobId) | ||
| jobIds.foreach(handleJobCancellation(_, "part of cancelled job group %s".format(groupId))) | ||
| submitWaitingStages() | ||
| } | ||
|
|
||
| private[scheduler] def handleBeginEvent(task: Task[_], taskInfo: TaskInfo) { | ||
| // Note that there is a chance that this task is launched after the stage is cancelled. | ||
| // In that case, we wouldn't have the stage anymore in stageIdToStage. | ||
| val stageAttemptId = stageIdToStage.get(task.stageId).map(_.latestInfo.attemptId).getOrElse(-1) | ||
| listenerBus.post(SparkListenerTaskStart(task.stageId, stageAttemptId, taskInfo)) | ||
| submitWaitingStages() | ||
| } | ||
|
|
||
| private[scheduler] def handleTaskSetFailed( | ||
| taskSet: TaskSet, | ||
| reason: String, | ||
| exception: Option[Throwable]): Unit = { | ||
| stageIdToStage.get(taskSet.stageId).foreach { abortStage(_, reason, exception) } | ||
| submitWaitingStages() | ||
| } | ||
|
|
||
| private[scheduler] def cleanUpAfterSchedulerStop() { | ||
|
|
@@ -832,7 +826,6 @@ class DAGScheduler( | |
|
|
||
| private[scheduler] def handleGetTaskResult(taskInfo: TaskInfo) { | ||
| listenerBus.post(SparkListenerTaskGettingResult(taskInfo)) | ||
| submitWaitingStages() | ||
| } | ||
|
|
||
| private[scheduler] def handleJobSubmitted(jobId: Int, | ||
|
|
@@ -871,8 +864,6 @@ class DAGScheduler( | |
| listenerBus.post( | ||
| SparkListenerJobStart(job.jobId, jobSubmissionTime, stageInfos, properties)) | ||
| submitStage(finalStage) | ||
|
|
||
| submitWaitingStages() | ||
| } | ||
|
|
||
| private[scheduler] def handleMapStageSubmitted(jobId: Int, | ||
|
|
@@ -916,8 +907,6 @@ class DAGScheduler( | |
| if (finalStage.isAvailable) { | ||
| markMapStageJobAsFinished(job, mapOutputTracker.getStatistics(dependency)) | ||
| } | ||
|
|
||
| submitWaitingStages() | ||
| } | ||
|
|
||
| /** Submits stage, but first recursively submits any missing parents. */ | ||
|
|
@@ -1073,6 +1062,8 @@ class DAGScheduler( | |
| s"Stage ${stage} is actually done; (partitions: ${stage.numPartitions})" | ||
| } | ||
| logDebug(debugString) | ||
|
|
||
| submitWaitingChildStages(stage) | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -1238,9 +1229,8 @@ class DAGScheduler( | |
| markMapStageJobAsFinished(job, stats) | ||
| } | ||
| } | ||
| submitWaitingChildStages(shuffleStage) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need to call this
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think we need. |
||
| } | ||
|
|
||
| // Note: newly runnable stages will be submitted below when we submit waiting stages | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -1315,7 +1305,6 @@ class DAGScheduler( | |
| // Unrecognized failure - also do nothing. If the task fails repeatedly, the TaskScheduler | ||
| // will abort the job. | ||
| } | ||
| submitWaitingStages() | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -1357,7 +1346,6 @@ class DAGScheduler( | |
| logDebug("Additional executor lost message for " + execId + | ||
| "(epoch " + currentEpoch + ")") | ||
| } | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it necessary to submit some newly-waiting stages here (e.g., if shuffle output was lost for a map stage, so now that map stage needs to be re-run)?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This appears to be a non-issue, because we handle lost shuffle output separately, when we get a FetchFailure from a task that tries to fetch the output. |
||
| submitWaitingStages() | ||
| } | ||
|
|
||
| private[scheduler] def handleExecutorAdded(execId: String, host: String) { | ||
|
|
@@ -1366,7 +1354,6 @@ class DAGScheduler( | |
| logInfo("Host added was in lost list earlier: " + host) | ||
| failedEpoch -= execId | ||
| } | ||
| submitWaitingStages() | ||
| } | ||
|
|
||
| private[scheduler] def handleStageCancellation(stageId: Int) { | ||
|
|
@@ -1379,7 +1366,6 @@ class DAGScheduler( | |
| case None => | ||
| logInfo("No active jobs to kill for Stage " + stageId) | ||
| } | ||
| submitWaitingStages() | ||
| } | ||
|
|
||
| private[scheduler] def handleJobCancellation(jobId: Int, reason: String = "") { | ||
|
|
@@ -1389,7 +1375,6 @@ class DAGScheduler( | |
| failJobAndIndependentStages( | ||
| jobIdToActiveJob(jobId), "Job %d cancelled %s".format(jobId, reason)) | ||
| } | ||
| submitWaitingStages() | ||
| } | ||
|
|
||
| /** | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Seems
submitWaitingChildStagesis called to submit child stages when the givenparentstage is available. From this observation, do we have to re-check missing parents insidesubmitStage?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, and the re-check is done in the
submitStage().If there are some missing parent stages, the child will go to
waitingStagesagain.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ahah, I see.