-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-4134] [SPARK-7835] [WIP] Dynamic allocation: Tone down kill error messages #6310
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
d2582be
c0cfb34
677df8a
536d953
3bf89d4
95b26b6
d5ab486
17af2ee
906f106
ec1cbf5
44f0617
044efc9
45b134b
9337e9d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -24,8 +24,8 @@ import scala.collection.mutable | |
| import org.apache.spark.executor.TaskMetrics | ||
| import org.apache.spark.rpc.{ThreadSafeRpcEndpoint, RpcEnv, RpcCallContext} | ||
| import org.apache.spark.storage.BlockManagerId | ||
| import org.apache.spark.scheduler.{SlaveLost, TaskScheduler} | ||
| import org.apache.spark.util.{ThreadUtils, Utils} | ||
| import org.apache.spark.scheduler._ | ||
| import org.apache.spark.util.{Clock, SystemClock, ThreadUtils, Utils} | ||
|
|
||
| /** | ||
| * A heartbeat from executors to the driver. This is a shared message used by several internal | ||
|
|
@@ -43,15 +43,25 @@ private[spark] case class Heartbeat( | |
| */ | ||
| private[spark] case object TaskSchedulerIsSet | ||
|
|
||
| private[spark] case object ExpireDeadHosts | ||
|
|
||
| private[spark] case object ExpireDeadHosts | ||
|
|
||
| private case class ExecutorRegistered(executorId: String) | ||
|
|
||
| private case class ExecutorRemoved(executorId: String) | ||
|
|
||
| private[spark] case class HeartbeatResponse(reregisterBlockManager: Boolean) | ||
|
|
||
| /** | ||
| * Lives in the driver to receive heartbeats from executors.. | ||
| */ | ||
| private[spark] class HeartbeatReceiver(sc: SparkContext) | ||
| extends ThreadSafeRpcEndpoint with Logging { | ||
| private[spark] class HeartbeatReceiver(sc: SparkContext, clock: Clock) | ||
| extends ThreadSafeRpcEndpoint with SparkListener with Logging { | ||
|
|
||
| def this(sc: SparkContext) { | ||
| this(sc, new SystemClock) | ||
| } | ||
|
|
||
| sc.addSparkListener(this) | ||
|
|
||
| override val rpcEnv: RpcEnv = sc.env.rpcEnv | ||
|
|
||
|
|
@@ -86,30 +96,45 @@ private[spark] class HeartbeatReceiver(sc: SparkContext) | |
| override def onStart(): Unit = { | ||
| timeoutCheckingTask = eventLoopThread.scheduleAtFixedRate(new Runnable { | ||
| override def run(): Unit = Utils.tryLogNonFatalError { | ||
| Option(self).foreach(_.send(ExpireDeadHosts)) | ||
| Option(self).foreach(_.ask[Boolean](ExpireDeadHosts)) | ||
| } | ||
| }, 0, checkTimeoutIntervalMs, TimeUnit.MILLISECONDS) | ||
| } | ||
|
|
||
| override def receive: PartialFunction[Any, Unit] = { | ||
| case ExpireDeadHosts => | ||
| expireDeadHosts() | ||
| case TaskSchedulerIsSet => | ||
| scheduler = sc.taskScheduler | ||
| case ExecutorRegistered(executorId) => | ||
| executorLastSeen(executorId) = clock.getTimeMillis() | ||
| case ExecutorRemoved(executorId) => | ||
| executorLastSeen.remove(executorId) | ||
| } | ||
|
|
||
| override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = { | ||
| case TaskSchedulerIsSet => | ||
| scheduler = sc.taskScheduler | ||
| context.reply(true) | ||
| case ExpireDeadHosts => | ||
| expireDeadHosts() | ||
| context.reply(true) | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I moved these here to increase test determinism |
||
| case heartbeat @ Heartbeat(executorId, taskMetrics, blockManagerId) => | ||
| if (scheduler != null) { | ||
| executorLastSeen(executorId) = System.currentTimeMillis() | ||
| eventLoopThread.submit(new Runnable { | ||
| override def run(): Unit = Utils.tryLogNonFatalError { | ||
| val unknownExecutor = !scheduler.executorHeartbeatReceived( | ||
| executorId, taskMetrics, blockManagerId) | ||
| val response = HeartbeatResponse(reregisterBlockManager = unknownExecutor) | ||
| context.reply(response) | ||
| } | ||
| }) | ||
| if (executorLastSeen.contains(executorId)) { | ||
| executorLastSeen(executorId) = clock.getTimeMillis() | ||
| eventLoopThread.submit(new Runnable { | ||
| override def run(): Unit = Utils.tryLogNonFatalError { | ||
| val unknownExecutor = !scheduler.executorHeartbeatReceived( | ||
| executorId, taskMetrics, blockManagerId) | ||
| val response = HeartbeatResponse(reregisterBlockManager = unknownExecutor) | ||
| context.reply(response) | ||
| } | ||
| }) | ||
| } else { | ||
| // This may happen if we get an executor's in-flight heartbeat immediately | ||
| // after we just removed it. It's not really an error condition so we should | ||
| // not log warning here. Otherwise there may be a lot of noise especially if | ||
| // we explicitly remove executors (SPARK-4134). | ||
| logDebug(s"Received heartbeat from unknown executor $executorId") | ||
| context.reply(HeartbeatResponse(reregisterBlockManager = true)) | ||
| } | ||
| } else { | ||
| // Because Executor will sleep several seconds before sending the first "Heartbeat", this | ||
| // case rarely happens. However, if it really happens, log it and ask the executor to | ||
|
|
@@ -119,9 +144,30 @@ private[spark] class HeartbeatReceiver(sc: SparkContext) | |
| } | ||
| } | ||
|
|
||
| /** | ||
| * If the heartbeat receiver is not stopped, notify it of executor registrations. | ||
| */ | ||
| override def onExecutorAdded(executorAdded: SparkListenerExecutorAdded): Unit = { | ||
| Option(self).foreach(_.send(ExecutorRegistered(executorAdded.executorId))) | ||
| } | ||
|
|
||
| /** | ||
| * If the heartbeat receiver is not stopped, notify it of executor removals so it doesn't | ||
| * log superfluous errors. | ||
| * | ||
| * Note that we must do this after the executor is actually removed to guard against the | ||
| * following race condition: if we remove an executor's metadata from our data structure | ||
| * prematurely, we may get an in-flight heartbeat from the executor before the executor is | ||
| * actually removed, in which case we will still mark the executor as a dead host later | ||
| * and expire it with loud error messages. | ||
| */ | ||
| override def onExecutorRemoved(executorRemoved: SparkListenerExecutorRemoved): Unit = { | ||
| Option(self).foreach(_.send(ExecutorRemoved(executorRemoved.executorId))) | ||
| } | ||
|
|
||
| private def expireDeadHosts(): Unit = { | ||
| logTrace("Checking for hosts with no recent heartbeats in HeartbeatReceiver.") | ||
| val now = System.currentTimeMillis() | ||
| val now = clock.getTimeMillis() | ||
| for ((executorId, lastSeenMs) <- executorLastSeen) { | ||
| if (now - lastSeenMs > executorTimeoutMs) { | ||
| logWarning(s"Removing executor $executorId with no recent heartbeats: " + | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -154,8 +154,8 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp | |
| } | ||
| context.reply(true) | ||
|
|
||
| case RemoveExecutor(executorId, reason) => | ||
| removeExecutor(executorId, reason) | ||
| case RemoveExecutor(executorId, reason, isError) => | ||
| removeExecutor(executorId, reason, isError) | ||
| context.reply(true) | ||
|
|
||
| case RetrieveSparkProps => | ||
|
|
@@ -210,7 +210,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp | |
| } | ||
|
|
||
| // Remove a disconnected slave from the cluster | ||
| def removeExecutor(executorId: String, reason: String): Unit = { | ||
| def removeExecutor(executorId: String, reason: String, isError: Boolean = true): Unit = { | ||
| executorDataMap.get(executorId) match { | ||
| case Some(executorInfo) => | ||
| // This must be synchronized because variables mutated | ||
|
|
@@ -222,7 +222,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp | |
| } | ||
| totalCoreCount.addAndGet(-executorInfo.totalCores) | ||
| totalRegisteredExecutors.addAndGet(-1) | ||
| scheduler.executorLost(executorId, SlaveLost(reason)) | ||
| scheduler.executorLost(executorId, new ExecutorLossReason(reason, isError)) | ||
| listenerBus.post( | ||
| SparkListenerExecutorRemoved(System.currentTimeMillis(), executorId, reason)) | ||
| case None => logError(s"Asked to remove non-existent executor $executorId") | ||
|
|
@@ -287,9 +287,9 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp | |
| } | ||
|
|
||
| // Called by subclasses when notified of a lost worker | ||
| def removeExecutor(executorId: String, reason: String) { | ||
| def removeExecutor(executorId: String, reason: String, isError: Boolean = true) { | ||
| try { | ||
| driverEndpoint.askWithRetry[Boolean](RemoveExecutor(executorId, reason)) | ||
| driverEndpoint.askWithRetry[Boolean](RemoveExecutor(executorId, reason, isError)) | ||
| } catch { | ||
| case e: Exception => | ||
| throw new SparkException("Error notifying standalone scheduler's driver endpoint", e) | ||
|
|
@@ -369,24 +369,47 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp | |
| * Request that the cluster manager kill the specified executors. | ||
| * Return whether the kill request is acknowledged. | ||
| */ | ||
| final override def killExecutors(executorIds: Seq[String]): Boolean = synchronized { | ||
| logInfo(s"Requesting to kill executor(s) ${executorIds.mkString(", ")}") | ||
| val filteredExecutorIds = new ArrayBuffer[String] | ||
| executorIds.foreach { id => | ||
| if (executorDataMap.contains(id)) { | ||
| filteredExecutorIds += id | ||
| final override def killExecutors(executorIds: Seq[String]): Boolean = { | ||
| val executorIdsToKill = new ArrayBuffer[String] | ||
|
|
||
| // New target total after all pending requests have been granted | ||
| val newTargetTotal = synchronized { | ||
| if (executorIds.size == 1) { | ||
| logInfo(s"Requesting cluster manager to kill executor ${executorIds.head}.") | ||
| } else if (executorIds.size > 1) { | ||
| logInfo(s"Requesting cluster manager to kill executors ${executorIds.mkString(", ")}") | ||
| } else { | ||
| logWarning(s"Executor to kill $id does not exist!") | ||
| // No executors to kill | ||
| return false | ||
| } | ||
|
|
||
| executorIds.foreach { id => | ||
| if (executorDataMap.contains(id)) { | ||
| executorIdsToKill += id | ||
| } else { | ||
| logWarning(s"Executor to kill $id does not exist!") | ||
| } | ||
| } | ||
|
|
||
| executorsPendingToRemove ++= executorIdsToKill | ||
|
|
||
| numExistingExecutors + numPendingExecutors - executorsPendingToRemove.size | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @vanzin I believe these two lines are functionally equivalent to what you had in the old code (L384). I believe the only difference is that I moved the What it used to look like: |
||
| } | ||
|
|
||
| // Killing executors means effectively that we want fewer executors than before, so also | ||
| // update the target number of executors to avoid having the backend allocate new ones. | ||
| // We should do this outside of the synchronized block to avoid holding on to a lock while | ||
| // awaiting a reply. | ||
| val acked = doRequestTotalExecutors(newTargetTotal) && doKillExecutors(executorIdsToKill) | ||
| if (acked) { | ||
| // Remove executors from various data structures in advance | ||
| // so we do not generate a bunch of unexpected error messages | ||
| executorIdsToKill.foreach { id => | ||
| removeExecutor(id, "manually killed", isError = false) | ||
| } | ||
| } | ||
| // Killing executors means effectively that we want less executors than before, so also update | ||
| // the target number of executors to avoid having the backend allocate new ones. | ||
| val newTotal = (numExistingExecutors + numPendingExecutors - executorsPendingToRemove.size | ||
| - filteredExecutorIds.size) | ||
| doRequestTotalExecutors(newTotal) | ||
|
|
||
| executorsPendingToRemove ++= filteredExecutorIds | ||
| doKillExecutors(filteredExecutorIds) | ||
| acked | ||
| } | ||
|
|
||
| /** | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you make
SystemClockas a default value to save one constructor, please?