@@ -23,7 +23,7 @@ import scala.collection.mutable
2323
2424import org .apache .hadoop .fs .Path
2525import org .apache .hadoop .mapreduce ._
26- import org .apache .hadoop .mapreduce .lib .output .{FileOutputCommitter , FileOutputFormat }
26+ import org .apache .hadoop .mapreduce .lib .output .{FileOutputCommitter => MapReduceFileOutputCommitter , FileOutputFormat }
2727import org .apache .hadoop .util .Shell
2828import parquet .hadoop .util .ContextUtil
2929
@@ -35,7 +35,7 @@ import org.apache.spark.sql.catalyst.expressions._
3535import org .apache .spark .sql .catalyst .expressions .codegen .GenerateProjection
3636import org .apache .spark .sql .catalyst .plans .logical .LogicalPlan
3737import org .apache .spark .sql .execution .RunnableCommand
38- import org .apache .spark .sql .{DataFrame , SQLContext , SaveMode }
38+ import org .apache .spark .sql .{SQLConf , DataFrame , SQLContext , SaveMode }
3939
4040private [sql] case class InsertIntoDataSource (
4141 logicalRelation : LogicalRelation ,
@@ -287,24 +287,39 @@ private[sql] abstract class BaseWriterContainer(
287287 protected def getWorkPath : String = {
288288 outputCommitter match {
289289 // FileOutputCommitter writes to a temporary location returned by `getWorkPath`.
290- case f : FileOutputCommitter => f.getWorkPath.toString
290+ case f : MapReduceFileOutputCommitter => f.getWorkPath.toString
291291 case _ => outputPath
292292 }
293293 }
294294
295295 private def newOutputCommitter (context : TaskAttemptContext ): OutputCommitter = {
296296 val committerClass = context.getConfiguration.getClass(
297- " mapred.output.committer.class " , null , classOf [OutputCommitter ])
297+ SQLConf . OUTPUT_COMMITTER_CLASS , null , classOf [OutputCommitter ])
298298
299299 Option (committerClass).map { clazz =>
300- val ctor = clazz.getDeclaredConstructor(classOf [Path ], classOf [TaskAttemptContext ])
301- ctor.newInstance(new Path (outputPath), context)
300+ // Every output format based on org.apache.hadoop.mapreduce.lib.output.OutputFormat
301+ // has an associated output committer. To override this output committer,
302+ // we will first try to use the output committer set in SQLConf.OUTPUT_COMMITTER_CLASS.
303+ // If a data source needs to override the output committer, it needs to set the
304+ // output committer in prepareForWrite method.
305+ if (classOf [MapReduceFileOutputCommitter ].isAssignableFrom(clazz)) {
306+ // The specified output committer is a FileOutputCommitter.
307+ // So, we will use the FileOutputCommitter-specified constructor.
308+ val ctor = clazz.getDeclaredConstructor(classOf [Path ], classOf [TaskAttemptContext ])
309+ ctor.newInstance(new Path (outputPath), context)
310+ } else {
311+ // The specified output committer is just a OutputCommitter.
312+ // So, we will use the no-argument constructor.
313+ val ctor = clazz.getDeclaredConstructor()
314+ ctor.newInstance()
315+ }
302316 }.getOrElse {
317+ // If output committer class is not set, we will use the one associated with the
318+ // file output format.
303319 outputFormatClass.newInstance().getOutputCommitter(context)
304320 }
305321 }
306322
307-
308323 private def setupIDs (jobId : Int , splitId : Int , attemptId : Int ): Unit = {
309324 this .jobId = SparkHadoopWriter .createJobID(new Date , jobId)
310325 this .taskId = new TaskID (this .jobId, true , splitId)
0 commit comments