apache · dongjoon-hyun · Sep 28, 2020 · yamyamyuo · Sep 29, 2020 · dongjoon-hyun
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
@@ -462,6 +462,9 @@ private[spark] object SparkHadoopUtil {
     for ((key, value) <- conf.getAll if key.startsWith("spark.hadoop.")) {
       hadoopConf.set(key.substring("spark.hadoop.".length), value)
     }
+    if (conf.getOption("spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version").isEmpty) {
+      hadoopConf.set("mapreduce.fileoutputcommitter.algorithm.version", "1")
+    }
   }
 
   private def appendSparkHiveConfigs(conf: SparkConf, hadoopConf: Configuration): Unit = {

diff --git a/docs/configuration.md b/docs/configuration.md
@@ -1761,16 +1761,10 @@ Apart from these, the following properties are also available, and may be useful
 </tr>
 <tr>
   <td><code>spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version</code></td>
-  <td>Dependent on environment</td>
+  <td>1</td>
   <td>
     The file output committer algorithm version, valid algorithm version number: 1 or 2.
-    Version 2 may have better performance, but version 1 may handle failures better in certain situations,
-    as per <a href="https://issues.apache.org/jira/browse/MAPREDUCE-4815">MAPREDUCE-4815</a>.
-    The default value depends on the Hadoop version used in an environment:
-    1 for Hadoop versions lower than 3.0
-    2 for Hadoop versions 3.0 and higher
-    It's important to note that this can change back to 1 again in the future once <a href="https://issues.apache.org/jira/browse/MAPREDUCE-7282">MAPREDUCE-7282</a>
-    is fixed and merged.
+    Note that 2 may cause a correctness issue like MAPREDUCE-7282.
   </td>
   <td>2.2.0</td>
 </tr>