[SPARK-19276][CORE] Fetch Failure handling robust to user error handling

squito · squito · commit b93c37f1b0df · 2017-01-19T00:43:05.000-06:00
Fault-tolerance in spark requires special handling of shuffle fetch
failures.  The Executor would catch FetchFailedException and send a
special msg back to the driver.

However, intervening user code could intercept that exception, and wrap
it with something else.  This even happens in SparkSQL.  So rather than
checking the exception directly, we'll store the fetch failure directly
in the TaskContext, where users can't touch it.

This includes a test case which failed before the fix.
diff --git a/core/src/main/scala/org/apache/spark/TaskContext.scala b/core/src/main/scala/org/apache/spark/TaskContext.scala
@@ -24,6 +24,7 @@ import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.executor.TaskMetrics
 import org.apache.spark.memory.TaskMemoryManager
 import org.apache.spark.metrics.source.Source
+import org.apache.spark.shuffle.FetchFailedException
 import org.apache.spark.util.{AccumulatorV2, TaskCompletionListener, TaskFailureListener}
 
 
@@ -190,4 +191,10 @@ abstract class TaskContext extends Serializable {
    */
   private[spark] def registerAccumulator(a: AccumulatorV2[_, _]): Unit
 
+  /**
+   * Record that this task has failed due to a fetch failure from a remote host.  This allows
+   * fetch-failure handling to get triggered by the driver, regardless of intervening user-code.
+   */
+  private[spark] def setFetchFailed(fetchFailed: FetchFailedException): Unit
+
 }
diff --git a/core/src/main/scala/org/apache/spark/TaskContextImpl.scala b/core/src/main/scala/org/apache/spark/TaskContextImpl.scala
@@ -26,6 +26,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.memory.TaskMemoryManager
 import org.apache.spark.metrics.MetricsSystem
 import org.apache.spark.metrics.source.Source
+import org.apache.spark.shuffle.FetchFailedException
 import org.apache.spark.util._
 
 private[spark] class TaskContextImpl(
@@ -56,6 +57,8 @@ private[spark] class TaskContextImpl(
   // Whether the task has failed.
   @volatile private var failed: Boolean = false
 
+  var fetchFailed: Option[FetchFailedException] = None
+
   override def addTaskCompletionListener(listener: TaskCompletionListener): this.type = {
     onCompleteCallbacks += listener
     this
@@ -126,4 +129,8 @@ private[spark] class TaskContextImpl(
     taskMetrics.registerAccumulator(a)
   }
 
+  private[spark] override def setFetchFailed(fetchFailed: FetchFailedException): Unit = {
+    this.fetchFailed = Some(fetchFailed)
+  }
+
 }
diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala
@@ -148,6 +148,8 @@ private[spark] class Executor(
 
   startDriverHeartbeater()
 
+  private[executor] def numRunningTasks: Int = runningTasks.size()
+
   def launchTask(context: ExecutorBackend, taskDescription: TaskDescription): Unit = {
     val tr = new TaskRunner(context, taskDescription)
     runningTasks.put(taskDescription.taskId, tr)
@@ -340,6 +342,14 @@ private[spark] class Executor(
             }
           }
         }
+        task.context.fetchFailed.foreach { fetchFailure =>
+          // uh-oh.  it appears the user code has caught the fetch-failure without throwing any
+          // other exceptions.  Its *possible* this is what the user meant to do (though highly
+          // unlikely).  So we will log an error and keep going.
+          logError(s"TID ${taskId} completed successfully though internally it encountered " +
+            s"unrecoverable fetch failures!  Most likely this means user code is incorrectly " +
+            s"swallowing Spark's internal exceptions", fetchFailure)
+        }
         val taskFinish = System.currentTimeMillis()
         val taskFinishCpu = if (threadMXBean.isCurrentThreadCpuTimeSupported) {
           threadMXBean.getCurrentThreadCpuTime
@@ -405,6 +415,13 @@ private[spark] class Executor(
           setTaskFinishedAndClearInterruptStatus()
           execBackend.statusUpdate(taskId, TaskState.FAILED, ser.serialize(reason))
 
+        case t: Throwable if task.context.fetchFailed.isDefined =>
+          // tbere was a fetch failure in the task, but some user code wrapped that exception
+          // and threw something else.  Regardless, we treat it as a fetch failure.
+          val reason = task.context.fetchFailed.get.toTaskFailedReason
+          setTaskFinishedAndClearInterruptStatus()
+          execBackend.statusUpdate(taskId, TaskState.FAILED, ser.serialize(reason))
+
         case _: TaskKilledException =>
           logInfo(s"Executor killed $taskName (TID $taskId)")
           setTaskFinishedAndClearInterruptStatus()
diff --git a/core/src/main/scala/org/apache/spark/scheduler/Task.scala b/core/src/main/scala/org/apache/spark/scheduler/Task.scala
@@ -17,19 +17,15 @@
 
 package org.apache.spark.scheduler
 
-import java.io.{DataInputStream, DataOutputStream}
 import java.nio.ByteBuffer
 import java.util.Properties
 
-import scala.collection.mutable
-import scala.collection.mutable.HashMap
-
 import org.apache.spark._
 import org.apache.spark.executor.TaskMetrics
 import org.apache.spark.internal.config.APP_CALLER_CONTEXT
 import org.apache.spark.memory.{MemoryMode, TaskMemoryManager}
 import org.apache.spark.metrics.MetricsSystem
-import org.apache.spark.serializer.SerializerInstance
+import org.apache.spark.shuffle.FetchFailedException
 import org.apache.spark.util._
 
 /**
@@ -137,6 +133,8 @@ private[spark] abstract class Task[T](
           memoryManager.synchronized { memoryManager.notifyAll() }
         }
       } finally {
+        // though we unset the ThreadLocal here, the context itself is still queried directly
+        // in the TaskRunner to check for FetchFailedExceptions
         TaskContext.unset()
       }
     }
@@ -156,7 +154,7 @@ private[spark] abstract class Task[T](
   var epoch: Long = -1
 
   // Task context, to be initialized in run().
-  @transient protected var context: TaskContextImpl = _
+  @transient var context: TaskContextImpl = _
 
   // The actual Thread on which the task is running, if any. Initialized in run().
   @volatile @transient private var taskThread: Thread = _
diff --git a/core/src/main/scala/org/apache/spark/shuffle/FetchFailedException.scala b/core/src/main/scala/org/apache/spark/shuffle/FetchFailedException.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.shuffle
 
-import org.apache.spark.{FetchFailed, TaskFailedReason}
+import org.apache.spark.{FetchFailed, TaskContext, TaskFailedReason}
 import org.apache.spark.storage.BlockManagerId
 import org.apache.spark.util.Utils
 
@@ -45,6 +45,12 @@ private[spark] class FetchFailedException(
     this(bmAddress, shuffleId, mapId, reduceId, cause.getMessage, cause)
   }
 
+  // SPARK-19267. We set the fetch failure in the task context, so that even if there is user-code
+  // which intercepts this exception (possibly wrapping it), the Executor can still tell there was
+  // a fetch failure, and send the correct error msg back to the driver.  The TaskContext won't be
+  // defined if this is run on the driver (just in test cases) -- we can safely ignore then.
+  Option(TaskContext.get()).map(_.setFetchFailed(this))
+
   def toTaskFailedReason: TaskFailedReason = FetchFailed(bmAddress, shuffleId, mapId, reduceId,
     Utils.exceptionString(this))
 }
diff --git a/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala b/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala
@@ -23,29 +23,34 @@ import java.util.concurrent.CountDownLatch
 
 import scala.collection.mutable.Map
 
-import org.mockito.Matchers._
-import org.mockito.Mockito.{mock, when}
+import org.mockito.ArgumentCaptor
+import org.mockito.Matchers.{any, eq => meq}
+import org.mockito.Mockito.{inOrder, when}
 import org.mockito.invocation.InvocationOnMock
 import org.mockito.stubbing.Answer
+import org.scalatest.mock.MockitoSugar
 
 import org.apache.spark._
 import org.apache.spark.TaskState.TaskState
 import org.apache.spark.memory.MemoryManager
 import org.apache.spark.metrics.MetricsSystem
+import org.apache.spark.rdd.RDD
 import org.apache.spark.rpc.RpcEnv
-import org.apache.spark.scheduler.{FakeTask, TaskDescription}
+import org.apache.spark.scheduler.{FakeTask, ResultTask, TaskDescription}
 import org.apache.spark.serializer.JavaSerializer
+import org.apache.spark.shuffle.FetchFailedException
+import org.apache.spark.storage.BlockManagerId
 
-class ExecutorSuite extends SparkFunSuite {
+class ExecutorSuite extends SparkFunSuite with LocalSparkContext with MockitoSugar {
 
   test("SPARK-15963: Catch `TaskKilledException` correctly in Executor.TaskRunner") {
     // mock some objects to make Executor.launchTask() happy
     val conf = new SparkConf
     val serializer = new JavaSerializer(conf)
-    val mockEnv = mock(classOf[SparkEnv])
-    val mockRpcEnv = mock(classOf[RpcEnv])
-    val mockMetricsSystem = mock(classOf[MetricsSystem])
-    val mockMemoryManager = mock(classOf[MemoryManager])
+    val mockEnv = mock[SparkEnv]
+    val mockRpcEnv = mock[RpcEnv]
+    val mockMetricsSystem = mock[MetricsSystem]
+    val mockMemoryManager = mock[MemoryManager]
     when(mockEnv.conf).thenReturn(conf)
     when(mockEnv.serializer).thenReturn(serializer)
     when(mockEnv.rpcEnv).thenReturn(mockRpcEnv)
@@ -55,16 +60,7 @@ class ExecutorSuite extends SparkFunSuite {
     val fakeTaskMetrics = serializer.newInstance().serialize(TaskMetrics.registered).array()
     val serializedTask = serializer.newInstance().serialize(
       new FakeTask(0, 0, Nil, fakeTaskMetrics))
-    val taskDescription = new TaskDescription(
-      taskId = 0,
-      attemptNumber = 0,
-      executorId = "",
-      name = "",
-      index = 0,
-      addedFiles = Map[String, Long](),
-      addedJars = Map[String, Long](),
-      properties = new Properties,
-      serializedTask)
+    val taskDescription = fakeTaskDescription(serializedTask)
 
     // we use latches to force the program to run in this order:
     // +-----------------------------+---------------------------------------+
@@ -86,7 +82,7 @@ class ExecutorSuite extends SparkFunSuite {
 
     val executorSuiteHelper = new ExecutorSuiteHelper
 
-    val mockExecutorBackend = mock(classOf[ExecutorBackend])
+    val mockExecutorBackend = mock[ExecutorBackend]
     when(mockExecutorBackend.statusUpdate(any(), any(), any()))
       .thenAnswer(new Answer[Unit] {
         var firstTime = true
@@ -133,6 +129,116 @@ class ExecutorSuite extends SparkFunSuite {
       }
     }
   }
+
+  test("SPARK-19276: Handle Fetch Failed for all intervening user code") {
+    val conf = new SparkConf().setMaster("local").setAppName("executor suite test")
+    val sc = new SparkContext(conf)
+
+    val serializer = SparkEnv.get.closureSerializer.newInstance()
+    val resultFunc = (context: TaskContext, itr: Iterator[Int]) => itr.size
+    val inputRDD = new FakeShuffleRDD(sc)
+    val secondRDD = new FetchFailureHidingRDD(sc, inputRDD)
+    val taskBinary = sc.broadcast(serializer.serialize((secondRDD, resultFunc)).array())
+    val serializedTaskMetrics = serializer.serialize(TaskMetrics.registered).array()
+    val task = new ResultTask(
+      stageId = 1,
+      stageAttemptId = 0,
+      taskBinary = taskBinary,
+      partition = secondRDD.partitions(0),
+      locs = Seq(),
+      outputId = 0,
+      localProperties = new Properties(),
+      serializedTaskMetrics = serializedTaskMetrics
+    )
+
+    val serTask = serializer.serialize(task)
+    val taskDescription = fakeTaskDescription(serTask)
+
+
+    val mockBackend = mock[ExecutorBackend]
+    var executor: Executor = null
+    try {
+      executor = new Executor("id", "localhost", SparkEnv.get, userClassPath = Nil, isLocal = true)
+      executor.launchTask(mockBackend, taskDescription)
+      val startTime = System.currentTimeMillis()
+      val maxTime = startTime + 5000
+      while (executor.numRunningTasks > 0 && System.currentTimeMillis() < maxTime) {
+        Thread.sleep(10)
+      }
+      val orderedMock = inOrder(mockBackend)
+      val statusCaptor = ArgumentCaptor.forClass(classOf[ByteBuffer])
+      orderedMock.verify(mockBackend)
+        .statusUpdate(meq(0L), meq(TaskState.RUNNING), statusCaptor.capture())
+      orderedMock.verify(mockBackend)
+        .statusUpdate(meq(0L), meq(TaskState.FAILED), statusCaptor.capture())
+      // first statusUpdate for RUNNING has empty data
+      assert(statusCaptor.getAllValues().get(0).remaining() === 0)
+      // second update is more interesting
+      val failureData = statusCaptor.getAllValues.get(1)
+      val failReason = serializer.deserialize[TaskFailedReason](failureData)
+      assert(failReason.isInstanceOf[FetchFailed])
+    } finally {
+      if (executor != null) {
+        executor.stop()
+      }
+    }
+  }
+
+  private def fakeTaskDescription(serializedTask: ByteBuffer): TaskDescription = {
+    new TaskDescription(
+      taskId = 0,
+      attemptNumber = 0,
+      executorId = "",
+      name = "",
+      index = 0,
+      addedFiles = Map[String, Long](),
+      addedJars = Map[String, Long](),
+      properties = new Properties,
+      serializedTask)
+  }
+
+}
+
+class FakeShuffleRDD(sc: SparkContext) extends RDD[Int](sc, Nil) {
+  override def compute(split: Partition, context: TaskContext): Iterator[Int] = {
+    new Iterator[Int] {
+      override def hasNext: Boolean = true
+      override def next(): Int = {
+        throw new FetchFailedException(
+          bmAddress = BlockManagerId("1", "hostA", 1234),
+          shuffleId = 0,
+          mapId = 0,
+          reduceId = 0,
+          message = "fake fetch failure"
+        )
+      }
+    }
+  }
+  override protected def getPartitions: Array[Partition] = {
+    Array(new SimplePartition)
+  }
+}
+
+class SimplePartition extends Partition {
+  override def index: Int = 0
+}
+
+class FetchFailureHidingRDD(
+    sc: SparkContext,
+    val input: FakeShuffleRDD) extends RDD[Int](input) {
+  override def compute(split: Partition, context: TaskContext): Iterator[Int] = {
+    val inItr = input.compute(split, context)
+    try {
+      Iterator(inItr.size)
+    } catch {
+      case t: Throwable =>
+        throw new RuntimeException("User Exception that hides the original exception", t)
+    }
+  }
+
+  override protected def getPartitions: Array[Partition] = {
+    Array(new SimplePartition)
+  }
 }
 
 // Helps to test("SPARK-15963")