From b9320c8d8a3735d4569709b51a0d66a7121e23cb Mon Sep 17 00:00:00 2001 From: yucai Date: Tue, 17 Jul 2018 18:20:18 +0800 Subject: [PATCH] [SPARK-24832][SQL] Improve inputMetrics's bytesRead update for ColumnarBatch --- .../spark/sql/execution/datasources/FileScanRDD.scala | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala index 28c36b6020d3..a46ad6624a05 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala @@ -114,11 +114,13 @@ class FileScanRDD( // don't need to run this `if` for every record. if (nextElement.isInstanceOf[ColumnarBatch]) { inputMetrics.incRecordsRead(nextElement.asInstanceOf[ColumnarBatch].numRows()) + updateBytesRead() } else { inputMetrics.incRecordsRead(1) - } - if (inputMetrics.recordsRead % SparkHadoopUtil.UPDATE_INPUT_METRICS_INTERVAL_RECORDS == 0) { - updateBytesRead() + if (inputMetrics.recordsRead % + SparkHadoopUtil.UPDATE_INPUT_METRICS_INTERVAL_RECORDS == 0) { + updateBytesRead() + } } nextElement }