diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala index e72b9cb694eb..49f24dfbd826 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/package.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala @@ -737,7 +737,7 @@ package object config { "application ends.") .version("3.3.0") .booleanConf - .createWithDefault(false) + .createWithDefault(true) private[spark] val SHUFFLE_SERVICE_FETCH_RDD_ENABLED = ConfigBuilder(Constants.SHUFFLE_SERVICE_FETCH_RDD_ENABLED) diff --git a/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala index 5d635011d2ec..7aec8eeaad42 100644 --- a/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala +++ b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala @@ -936,6 +936,7 @@ class MapOutputTrackerSuite extends SparkFunSuite with LocalSparkContext { val newConf = new SparkConf newConf.set("spark.shuffle.push.enabled", "true") newConf.set("spark.shuffle.service.enabled", "true") + newConf.set("spark.shuffle.service.removeShuffle", "false") newConf.set(SERIALIZER, "org.apache.spark.serializer.KryoSerializer") newConf.set(IS_TESTING, true) diff --git a/docs/configuration.md b/docs/configuration.md index 9a686bc514c5..f2a885be6fc8 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -1152,7 +1152,7 @@ Apart from these, the following properties are also available, and may be useful spark.shuffle.service.removeShuffle - false + true Whether to use the ExternalShuffleService for deleting shuffle blocks for deallocated executors when the shuffle is no longer needed. Without this enabled, diff --git a/docs/core-migration-guide.md b/docs/core-migration-guide.md index e64597b52ccb..f42dfadb2a2a 100644 --- a/docs/core-migration-guide.md +++ b/docs/core-migration-guide.md @@ -40,6 +40,8 @@ license: | - Since Spark 4.0, Spark uses `~/.ivy2.5.2` as Ivy user directory by default to isolate the existing systems from Apache Ivy's incompatibility. To restore the legacy behavior, you can set `spark.jars.ivy` to `~/.ivy2`. +- Since Spark 4.0, Spark uses the external shuffle service for deleting shuffle blocks for deallocated executors when the shuffle is no longer needed. To restore the legacy behavior, you can set `spark.shuffle.service.removeShuffle` to `false`. + ## Upgrading from Core 3.4 to 3.5 - Since Spark 3.5, `spark.yarn.executor.failuresValidityInterval` is deprecated. Use `spark.executor.failuresValidityInterval` instead.