remove flag

jiangxb1987 · jiangxb1987 · commit 52e76e9306d9 · 2018-07-04T23:34:19.000+08:00
diff --git a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
@@ -441,7 +441,7 @@ private[spark] class MapOutputTrackerMaster(
         shuffleStatus.removeOutputsByFilter(x => true)
         incrementEpoch()
       case None =>
-        throw new SparkException("unregisterMapOutput called for nonexistent shuffle ID")
+        throw new SparkException("unregisterAllMapOutput called for nonexistent shuffle ID")
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -559,16 +559,4 @@ package object config {
       .intConf
       .checkValue(v => v > 0, "The value should be a positive integer.")
       .createWithDefault(2000)
-
-  private[spark] val RECOMPUTE_ALL_PARTITIONS_ON_REPARTITION_FAILURE =
-    ConfigBuilder("spark.shuffle.recomputeAllPartitionsOnRepartitionFailure")
-      .internal()
-      .doc("When perform repartition on an RDD, there may be data correctness issue if " +
-        "only a sub-set of partitions are recomputed on fetch failure and the input data " +
-        "sequence is not deterministic. Turn on this config to always recompute all the " +
-        "partitions before the repartition shuffle on fetch failure to ensure we always get " +
-        "correct result. Please note that turning on this config may increase the risk of job " +
-        "failing due to reach max consequence stage failure limit.")
-      .booleanConf
-      .createWithDefault(true)
 }
diff --git a/core/src/main/scala/org/apache/spark/rdd/MapPartitionsRDD.scala b/core/src/main/scala/org/apache/spark/rdd/MapPartitionsRDD.scala
@@ -28,7 +28,7 @@ private[spark] class MapPartitionsRDD[U: ClassTag, T: ClassTag](
     var prev: RDD[T],
     f: (TaskContext, Int, Iterator[T]) => Iterator[U],  // (TaskContext, partition index, iterator)
     preservesPartitioning: Boolean = false,
-    recomputeOnFailure: Boolean = false)
+    retryOnAllPartitionsOnFailure: Boolean = false)
   extends RDD[U](prev) {
 
   override val partitioner = if (preservesPartitioning) firstParent[T].partitioner else None
@@ -43,5 +43,5 @@ private[spark] class MapPartitionsRDD[U: ClassTag, T: ClassTag](
     prev = null
   }
 
-  override def recomputeAllPartitionsOnFailure(): Boolean = recomputeOnFailure
+  override def recomputeAllPartitionsOnFailure(): Boolean = retryOnAllPartitionsOnFailure
 }
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -453,8 +453,9 @@ abstract class RDD[T: ClassTag](
       val distributePartition = (index: Int, items: Iterator[T]) => {
         var position = new Random(hashing.byteswap32(index)).nextInt(numPartitions)
         // TODO Enable insert a local sort before shuffle to make input data sequence
-        // deterministic, thus the config
-        // "spark.shuffle.recomputeAllPartitionsOnRepartitionFailure" can be disabled.
+        // deterministic, to avoid retry on all partitions on FetchFailure. However, performing
+        // a local sort before shuffle may increase the execution time of repartition()
+        // significantly (For some large input data can cost 3x ~ 5x time).
         items.map { t =>
           // Note that the hash code of the key will just be the key itself. The HashPartitioner
           // will mod it with the number of total partitions.
@@ -464,11 +465,11 @@ abstract class RDD[T: ClassTag](
       } : Iterator[(Int, T)]
 
       // include a shuffle step so that our upstream tasks are still distributed
-      val recomputeOnFailure =
-        conf.getBoolean("spark.shuffle.recomputeAllPartitionsOnRepartitionFailure", true)
       new CoalescedRDD(
         new ShuffledRDD[Int, T, T](
-          mapPartitionsWithIndex(distributePartition, recomputeOnFailure),
+          mapPartitionsWithIndexInternal(
+            distributePartition,
+            retryOnAllPartitionsOnFailure = true),
           new HashPartitioner(numPartitions)),
         numPartitions,
         partitionCoalescer).values
@@ -815,14 +816,19 @@ abstract class RDD[T: ClassTag](
    * @param preservesPartitioning indicates whether the input function preserves the partitioner,
    * which should be `false` unless this is a pair RDD and the input function doesn't modify
    * the keys.
+   *
+   * @param retryOnAllPartitionsOnFailure indicates whether to recompute on all the partitions on
+   * failure recovery, which should be `false` unless the output is repartitioned.
    */
   private[spark] def mapPartitionsWithIndexInternal[U: ClassTag](
       f: (Int, Iterator[T]) => Iterator[U],
-      preservesPartitioning: Boolean = false): RDD[U] = withScope {
+      preservesPartitioning: Boolean = false,
+      retryOnAllPartitionsOnFailure: Boolean = false): RDD[U] = withScope {
     new MapPartitionsRDD(
       this,
       (context: TaskContext, index: Int, iter: Iterator[T]) => f(index, iter),
-      preservesPartitioning)
+      preservesPartitioning,
+      retryOnAllPartitionsOnFailure)
   }
 
   /**
@@ -843,21 +849,15 @@ abstract class RDD[T: ClassTag](
    *
    * `preservesPartitioning` indicates whether the input function preserves the partitioner, which
    * should be `false` unless this is a pair RDD and the input function doesn't modify the keys.
-   *
-   * `recomputeOnFailure` indicates whether to recompute on all the partitions on failure recovery,
-   * which should be `false` unless the output is not sorted or not sortable, and the output is
-   * repartitioned.
    */
   def mapPartitionsWithIndex[U: ClassTag](
       f: (Int, Iterator[T]) => Iterator[U],
-      preservesPartitioning: Boolean = false,
-      recomputeOnFailure: Boolean = false): RDD[U] = withScope {
+      preservesPartitioning: Boolean = false): RDD[U] = withScope {
     val cleanedF = sc.clean(f)
     new MapPartitionsRDD(
       this,
       (context: TaskContext, index: Int, iter: Iterator[T]) => cleanedF(index, iter),
-      preservesPartitioning,
-      recomputeOnFailure)
+      preservesPartitioning)
   }
 
   /**
@@ -1853,14 +1853,18 @@ abstract class RDD[T: ClassTag](
   }
 
   /**
-   * Whether or not the RDD is required to recompute all partitions on failure. Repartition on an
-   * RDD performs in a round-robin manner, thus there may be data correctness issue if only a
-   * sub-set of partitions are recomputed on failure and the input data sequence is not
+   * Whether or not the RDD is required to recompute all partitions on FetchFailure. Repartition on
+   * an RDD performs in a round-robin manner, thus there may be data correctness issue if only a
+   * sub-set of partitions are recomputed on FetchFailure and the input data sequence is not
    * deterministic. Please refer to SPARK-23207 and SPARK-23243 for related discussion.
    *
-   * Require to recompute all partitions on failure if repartition operation is called on this RDD
-   * and the result sequence of this RDD is not deterministic (or the data type of the output of
-   * this RDD is not sortable).
+   * Ideally we don't need to recompute all partitions on FetchFailure if the result sequence of an
+   * RDD is deterministic, but various sources (that out of control of Spark) may lead to
+   * non-determine result sequence(e.g. read from external data source / different spill and merge
+   * pattern under memory pressure), and we cannot bear the performance degradation by inserting a
+   * local sort before shuffle(can cost 3x ~ 5x time for repartition()), and the data type of an
+   * RDD may even be not sortable. Due to the above reason, we make a compromise to just require to
+   * recompute all partitions on FetchFailure if repartition operation is called on an RDD.
    */
   private[spark] def recomputeAllPartitionsOnFailure(): Boolean = false
 }

Original file line number	Diff line number	Diff line change
`@@ -441,7 +441,7 @@ private[spark] class MapOutputTrackerMaster(`
`441`	`441`	`shuffleStatus.removeOutputsByFilter(x => true)`
`442`	`442`	`incrementEpoch()`
`443`	`443`	`case None =>`
`444`		`- throw new SparkException("unregisterMapOutput called for nonexistent shuffle ID")`
	`444`	`+ throw new SparkException("unregisterAllMapOutput called for nonexistent shuffle ID")`
`445`	`445`	`}`
`446`	`446`	`}`
`447`	`447`
Original file line number	Diff line number	Diff line change
`@@ -28,7 +28,7 @@ private[spark] class MapPartitionsRDD[U: ClassTag, T: ClassTag](`
`28`	`28`	`var prev: RDD[T],`
`29`	`29`	`f: (TaskContext, Int, Iterator[T]) => Iterator[U], // (TaskContext, partition index, iterator)`
`30`	`30`	`preservesPartitioning: Boolean = false,`
`31`		`- recomputeOnFailure: Boolean = false)`
	`31`	`+ retryOnAllPartitionsOnFailure: Boolean = false)`
`32`	`32`	`extends RDD[U](prev) {`
`33`	`33`
`34`	`34`	`override val partitioner = if (preservesPartitioning) firstParent[T].partitioner else None`
`@@ -43,5 +43,5 @@ private[spark] class MapPartitionsRDD[U: ClassTag, T: ClassTag](`
`43`	`43`	`prev = null`
`44`	`44`	`}`
`45`	`45`
`46`		`- override def recomputeAllPartitionsOnFailure(): Boolean = recomputeOnFailure`
	`46`	`+ override def recomputeAllPartitionsOnFailure(): Boolean = retryOnAllPartitionsOnFailure`
`47`	`47`	`}`