From 81fe383d4f1189c3a4a7bae32f8ca38d123e6d7d Mon Sep 17 00:00:00 2001 From: yucai Date: Fri, 26 Oct 2018 13:40:29 +0800 Subject: [PATCH 1/8] BuiltInDataSourceWriteBenchmark --- .../spark/benchmark/BenchmarkBase.scala | 2 + .../benchmarks/AvroWriteBenchmark-results.txt | 10 +++ .../benchmark/AvroWriteBenchmark.scala | 24 +++---- ...uiltInDataSourceWriteBenchmark-results.txt | 60 ++++++++++++++++ .../BuiltInDataSourceWriteBenchmark.scala | 68 +++++++------------ .../benchmark/DataSourceWriteBenchmark.scala | 15 +--- 6 files changed, 107 insertions(+), 72 deletions(-) create mode 100644 external/avro/benchmarks/AvroWriteBenchmark-results.txt create mode 100644 sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-results.txt diff --git a/core/src/test/scala/org/apache/spark/benchmark/BenchmarkBase.scala b/core/src/test/scala/org/apache/spark/benchmark/BenchmarkBase.scala index 89e927e5784d..9f95cbd57616 100644 --- a/core/src/test/scala/org/apache/spark/benchmark/BenchmarkBase.scala +++ b/core/src/test/scala/org/apache/spark/benchmark/BenchmarkBase.scala @@ -24,6 +24,7 @@ import java.io.{File, FileOutputStream, OutputStream} */ abstract class BenchmarkBase { var output: Option[OutputStream] = None + var mainArgs: Array[String] = _ /** * Main process of the whole benchmark. @@ -41,6 +42,7 @@ abstract class BenchmarkBase { } def main(args: Array[String]): Unit = { + mainArgs = args val regenerateBenchmarkFiles: Boolean = System.getenv("SPARK_GENERATE_BENCHMARK_FILES") == "1" if (regenerateBenchmarkFiles) { val resultFileName = s"${this.getClass.getSimpleName.replace("$", "")}-results.txt" diff --git a/external/avro/benchmarks/AvroWriteBenchmark-results.txt b/external/avro/benchmarks/AvroWriteBenchmark-results.txt new file mode 100644 index 000000000000..93d94bffadf4 --- /dev/null +++ b/external/avro/benchmarks/AvroWriteBenchmark-results.txt @@ -0,0 +1,10 @@ +Java HotSpot(TM) 64-Bit Server VM 1.8.0_162-b12 on Mac OS X 10.13.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz +Avro writer benchmark: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Output Single Int Column 2287 / 2295 6.9 145.4 1.0X +Output Single Double Column 2494 / 2538 6.3 158.6 0.9X +Output Int and String Column 5555 / 5587 2.8 353.2 0.4X +Output Partitions 3928 / 4157 4.0 249.7 0.6X +Output Buckets 5374 / 5441 2.9 341.7 0.4X + diff --git a/external/avro/src/test/scala/org/apache/spark/sql/execution/benchmark/AvroWriteBenchmark.scala b/external/avro/src/test/scala/org/apache/spark/sql/execution/benchmark/AvroWriteBenchmark.scala index df13b4a1c2d3..e6cf4d66b39b 100644 --- a/external/avro/src/test/scala/org/apache/spark/sql/execution/benchmark/AvroWriteBenchmark.scala +++ b/external/avro/src/test/scala/org/apache/spark/sql/execution/benchmark/AvroWriteBenchmark.scala @@ -19,22 +19,16 @@ package org.apache.spark.sql.execution.benchmark /** * Benchmark to measure Avro data sources write performance. - * Usage: - * 1. with spark-submit: bin/spark-submit --class - * 2. with sbt: build/sbt "avro/test:runMain " + * {{{ + * To run this benchmark: + * 1. without sbt: bin/spark-submit --class + * 2. build/sbt "sql/test:runMain " + * 3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "avro/test:runMain " + * Results will be written to "benchmarks/AvroWriteBenchmark-results.txt". + * }}} */ object AvroWriteBenchmark extends DataSourceWriteBenchmark { - def main(args: Array[String]): Unit = { - /* - Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz - Avro writer benchmark: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - Output Single Int Column 2481 / 2499 6.3 157.8 1.0X - Output Single Double Column 2705 / 2710 5.8 172.0 0.9X - Output Int and String Column 5539 / 5639 2.8 352.2 0.4X - Output Partitions 4613 / 5004 3.4 293.3 0.5X - Output Buckets 5554 / 5561 2.8 353.1 0.4X - */ - runBenchmark("Avro") + override def runBenchmarkSuite(): Unit = { + runDataSourceBenchmark("Avro") } } diff --git a/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-results.txt b/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-results.txt new file mode 100644 index 000000000000..162783850332 --- /dev/null +++ b/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-results.txt @@ -0,0 +1,60 @@ +================================================================================================ +Parquet writer benchmark +================================================================================================ + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_162-b12 on Mac OS X 10.13.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz +Parquet writer benchmark: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Output Single Int Column 2049 / 2120 7.7 130.2 1.0X +Output Single Double Column 2194 / 2203 7.2 139.5 0.9X +Output Int and String Column 5704 / 5715 2.8 362.7 0.4X +Output Partitions 3727 / 3856 4.2 237.0 0.5X +Output Buckets 5119 / 5361 3.1 325.4 0.4X + + +================================================================================================ +ORC writer benchmark +================================================================================================ + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_162-b12 on Mac OS X 10.13.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz +ORC writer benchmark: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Output Single Int Column 1109 / 1116 14.2 70.5 1.0X +Output Single Double Column 1366 / 1378 11.5 86.8 0.8X +Output Int and String Column 5303 / 5318 3.0 337.2 0.2X +Output Partitions 3078 / 3472 5.1 195.7 0.4X +Output Buckets 4374 / 4398 3.6 278.1 0.3X + + +================================================================================================ +JSON writer benchmark +================================================================================================ + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_162-b12 on Mac OS X 10.13.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz +JSON writer benchmark: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Output Single Int Column 1806 / 1813 8.7 114.8 1.0X +Output Single Double Column 2530 / 2538 6.2 160.8 0.7X +Output Int and String Column 5311 / 5344 3.0 337.6 0.3X +Output Partitions 3524 / 3580 4.5 224.1 0.5X +Output Buckets 4661 / 4723 3.4 296.3 0.4X + + +================================================================================================ +CSV writer benchmark +================================================================================================ + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_162-b12 on Mac OS X 10.13.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz +CSV writer benchmark: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Output Single Int Column 2863 / 2896 5.5 182.0 1.0X +Output Single Double Column 3233 / 3238 4.9 205.6 0.9X +Output Int and String Column 6805 / 6822 2.3 432.6 0.4X +Output Partitions 4873 / 4886 3.2 309.8 0.6X +Output Buckets 6733 / 6757 2.3 428.1 0.4X + + diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BuiltInDataSourceWriteBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BuiltInDataSourceWriteBenchmark.scala index 2de516c19da9..274c43d6bc30 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BuiltInDataSourceWriteBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BuiltInDataSourceWriteBenchmark.scala @@ -18,62 +18,40 @@ package org.apache.spark.sql.execution.benchmark /** * Benchmark to measure built-in data sources write performance. - * By default it measures 4 data source format: Parquet, ORC, JSON, CSV. Run it with spark-submit: - * spark-submit --class - * Or with sbt: - * build/sbt "sql/test:runMain " * - * To measure specified formats, run it with arguments: - * spark-submit --class format1 [format2] [...] - * Or with sbt: - * build/sbt "sql/test:runMain format1 [format2] [...]" + * {{{ + * To run this benchmark: + * By default it measures 4 data source format: Parquet, ORC, JSON, CSV. + * 1. without sbt: bin/spark-submit --class + * 2. build/sbt "sql/test:runMain " + * 3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain " + * Results will be written to "benchmarks/BuiltInDataSourceWriteBenchmark-results.txt". + * + * To measure specified formats, run it with arguments. + * 1. without sbt: + * bin/spark-submit --class format1 [format2] [...] + * 2. build/sbt "sql/test:runMain format1 [format2] [...]" + * 3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt + * "sql/test:runMain format1 [format2] [...]" + * Results will be written to "benchmarks/BuiltInDataSourceWriteBenchmark-results.txt". + * }}} + * */ object BuiltInDataSourceWriteBenchmark extends DataSourceWriteBenchmark { - def main(args: Array[String]): Unit = { - val formats: Seq[String] = if (args.isEmpty) { + override def runBenchmarkSuite(): Unit = { + val formats: Seq[String] = if (mainArgs.isEmpty) { Seq("Parquet", "ORC", "JSON", "CSV") } else { - args + mainArgs } spark.conf.set("spark.sql.parquet.compression.codec", "snappy") spark.conf.set("spark.sql.orc.compression.codec", "snappy") - /* - Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz - Parquet writer benchmark: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - Output Single Int Column 1815 / 1932 8.7 115.4 1.0X - Output Single Double Column 1877 / 1878 8.4 119.3 1.0X - Output Int and String Column 6265 / 6543 2.5 398.3 0.3X - Output Partitions 4067 / 4457 3.9 258.6 0.4X - Output Buckets 5608 / 5820 2.8 356.6 0.3X - - ORC writer benchmark: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - Output Single Int Column 1201 / 1239 13.1 76.3 1.0X - Output Single Double Column 1542 / 1600 10.2 98.0 0.8X - Output Int and String Column 6495 / 6580 2.4 412.9 0.2X - Output Partitions 3648 / 3842 4.3 231.9 0.3X - Output Buckets 5022 / 5145 3.1 319.3 0.2X - - JSON writer benchmark: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - Output Single Int Column 1988 / 2093 7.9 126.4 1.0X - Output Single Double Column 2854 / 2911 5.5 181.4 0.7X - Output Int and String Column 6467 / 6653 2.4 411.1 0.3X - Output Partitions 4548 / 5055 3.5 289.1 0.4X - Output Buckets 5664 / 5765 2.8 360.1 0.4X - CSV writer benchmark: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - Output Single Int Column 3025 / 3190 5.2 192.3 1.0X - Output Single Double Column 3575 / 3634 4.4 227.3 0.8X - Output Int and String Column 7313 / 7399 2.2 464.9 0.4X - Output Partitions 5105 / 5190 3.1 324.6 0.6X - Output Buckets 6986 / 6992 2.3 444.1 0.4X - */ formats.foreach { format => - runBenchmark(format) + runBenchmark(s"$format writer benchmark") { + runDataSourceBenchmark(format) + } } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DataSourceWriteBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DataSourceWriteBenchmark.scala index 994d6b5b7d33..405d60794ede 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DataSourceWriteBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DataSourceWriteBenchmark.scala @@ -16,18 +16,9 @@ */ package org.apache.spark.sql.execution.benchmark -import org.apache.spark.SparkConf import org.apache.spark.benchmark.Benchmark -import org.apache.spark.sql.SparkSession -import org.apache.spark.sql.internal.SQLConf -trait DataSourceWriteBenchmark { - val conf = new SparkConf() - .setAppName("DataSourceWriteBenchmark") - .setIfMissing("spark.master", "local[1]") - .set(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key, "true") - - val spark = SparkSession.builder.config(conf).getOrCreate() +trait DataSourceWriteBenchmark extends SqlBasedBenchmark { val tempTable = "temp" val numRows = 1024 * 1024 * 15 @@ -75,7 +66,7 @@ trait DataSourceWriteBenchmark { } } - def runBenchmark(format: String): Unit = { + def runDataSourceBenchmark(format: String): Unit = { val tableInt = "tableInt" val tableDouble = "tableDouble" val tableIntString = "tableIntString" @@ -84,7 +75,7 @@ trait DataSourceWriteBenchmark { withTempTable(tempTable) { spark.range(numRows).createOrReplaceTempView(tempTable) withTable(tableInt, tableDouble, tableIntString, tablePartition, tableBucket) { - val benchmark = new Benchmark(s"$format writer benchmark", numRows) + val benchmark = new Benchmark(s"$format writer benchmark", numRows, output = output) writeNumeric(tableInt, format, benchmark, "Int") writeNumeric(tableDouble, format, benchmark, "Double") writeIntString(tableIntString, format, benchmark) From 155a872d47dc8ebe63ee1e910d44b356d139a625 Mon Sep 17 00:00:00 2001 From: yucai Date: Mon, 29 Oct 2018 13:46:31 +0800 Subject: [PATCH 2/8] monir --- .../test/scala/org/apache/spark/benchmark/BenchmarkBase.scala | 2 -- 1 file changed, 2 deletions(-) diff --git a/core/src/test/scala/org/apache/spark/benchmark/BenchmarkBase.scala b/core/src/test/scala/org/apache/spark/benchmark/BenchmarkBase.scala index 9f95cbd57616..89e927e5784d 100644 --- a/core/src/test/scala/org/apache/spark/benchmark/BenchmarkBase.scala +++ b/core/src/test/scala/org/apache/spark/benchmark/BenchmarkBase.scala @@ -24,7 +24,6 @@ import java.io.{File, FileOutputStream, OutputStream} */ abstract class BenchmarkBase { var output: Option[OutputStream] = None - var mainArgs: Array[String] = _ /** * Main process of the whole benchmark. @@ -42,7 +41,6 @@ abstract class BenchmarkBase { } def main(args: Array[String]): Unit = { - mainArgs = args val regenerateBenchmarkFiles: Boolean = System.getenv("SPARK_GENERATE_BENCHMARK_FILES") == "1" if (regenerateBenchmarkFiles) { val resultFileName = s"${this.getClass.getSimpleName.replace("$", "")}-results.txt" From 22b4c50a73538a674e981be2d4c0caec98b3a573 Mon Sep 17 00:00:00 2001 From: yucai Date: Mon, 29 Oct 2018 13:45:02 +0800 Subject: [PATCH 3/8] [SPARK-25864][SQL][TEST] Make main args set correctly in BenchmarkBase --- .../test/scala/org/apache/spark/benchmark/BenchmarkBase.scala | 4 ++-- .../scala/org/apache/spark/serializer/KryoBenchmark.scala | 2 +- .../apache/spark/mllib/linalg/UDTSerializationBenchmark.scala | 2 +- .../src/test/scala/org/apache/spark/sql/HashBenchmark.scala | 2 +- .../scala/org/apache/spark/sql/HashByteArrayBenchmark.scala | 2 +- .../org/apache/spark/sql/UnsafeProjectionBenchmark.scala | 2 +- .../test/scala/org/apache/spark/sql/DatasetBenchmark.scala | 2 +- .../spark/sql/execution/benchmark/AggregateBenchmark.scala | 2 +- .../spark/sql/execution/benchmark/BloomFilterBenchmark.scala | 2 +- .../sql/execution/benchmark/DataSourceReadBenchmark.scala | 2 +- .../sql/execution/benchmark/FilterPushdownBenchmark.scala | 2 +- .../apache/spark/sql/execution/benchmark/JoinBenchmark.scala | 2 +- .../apache/spark/sql/execution/benchmark/MiscBenchmark.scala | 2 +- .../sql/execution/benchmark/PrimitiveArrayBenchmark.scala | 2 +- .../apache/spark/sql/execution/benchmark/RangeBenchmark.scala | 2 +- .../apache/spark/sql/execution/benchmark/SortBenchmark.scala | 2 +- .../sql/execution/benchmark/UnsafeArrayDataBenchmark.scala | 2 +- .../spark/sql/execution/benchmark/WideSchemaBenchmark.scala | 2 +- .../columnar/compression/CompressionSchemeBenchmark.scala | 2 +- .../sql/execution/vectorized/ColumnarBatchBenchmark.scala | 2 +- .../benchmark/ObjectHashAggregateExecBenchmark.scala | 2 +- .../org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala | 2 +- 22 files changed, 23 insertions(+), 23 deletions(-) diff --git a/core/src/test/scala/org/apache/spark/benchmark/BenchmarkBase.scala b/core/src/test/scala/org/apache/spark/benchmark/BenchmarkBase.scala index 89e927e5784d..24e596e1ecda 100644 --- a/core/src/test/scala/org/apache/spark/benchmark/BenchmarkBase.scala +++ b/core/src/test/scala/org/apache/spark/benchmark/BenchmarkBase.scala @@ -30,7 +30,7 @@ abstract class BenchmarkBase { * Implementations of this method are supposed to use the wrapper method `runBenchmark` * for each benchmark scenario. */ - def runBenchmarkSuite(): Unit + def runBenchmarkSuite(mainArgs: Array[String]): Unit final def runBenchmark(benchmarkName: String)(func: => Any): Unit = { val separator = "=" * 96 @@ -51,7 +51,7 @@ abstract class BenchmarkBase { output = Some(new FileOutputStream(file)) } - runBenchmarkSuite() + runBenchmarkSuite(args) output.foreach { o => if (o != null) { diff --git a/core/src/test/scala/org/apache/spark/serializer/KryoBenchmark.scala b/core/src/test/scala/org/apache/spark/serializer/KryoBenchmark.scala index 8a52c131af84..d7730f23da10 100644 --- a/core/src/test/scala/org/apache/spark/serializer/KryoBenchmark.scala +++ b/core/src/test/scala/org/apache/spark/serializer/KryoBenchmark.scala @@ -39,7 +39,7 @@ import org.apache.spark.serializer.KryoTest._ object KryoBenchmark extends BenchmarkBase { val N = 1000000 - override def runBenchmarkSuite(): Unit = { + override def runBenchmarkSuite(mainArgs: Array[String]): Unit = { val name = "Benchmark Kryo Unsafe vs safe Serialization" runBenchmark(name) { val benchmark = new Benchmark(name, N, 10, output = output) diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/UDTSerializationBenchmark.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/UDTSerializationBenchmark.scala index 6c1d58089867..5f19e466ecad 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/UDTSerializationBenchmark.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/UDTSerializationBenchmark.scala @@ -32,7 +32,7 @@ import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder */ object UDTSerializationBenchmark extends BenchmarkBase { - override def runBenchmarkSuite(): Unit = { + override def runBenchmarkSuite(mainArgs: Array[String]): Unit = { runBenchmark("VectorUDT de/serialization") { val iters = 1e2.toInt diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/HashBenchmark.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/HashBenchmark.scala index 4226ab3773fe..3b4b80daf084 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/HashBenchmark.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/HashBenchmark.scala @@ -102,7 +102,7 @@ object HashBenchmark extends BenchmarkBase { } } - override def runBenchmarkSuite(): Unit = { + override def runBenchmarkSuite(mainArgs: Array[String]): Unit = { val singleInt = new StructType().add("i", IntegerType) test("single ints", singleInt, 1 << 15, 1 << 14) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/HashByteArrayBenchmark.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/HashByteArrayBenchmark.scala index 7dc865d85af0..dbfa7bb18aa6 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/HashByteArrayBenchmark.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/HashByteArrayBenchmark.scala @@ -83,7 +83,7 @@ object HashByteArrayBenchmark extends BenchmarkBase { benchmark.run() } - override def runBenchmarkSuite(): Unit = { + override def runBenchmarkSuite(mainArgs: Array[String]): Unit = { runBenchmark("Benchmark for MurMurHash 3 and xxHash64") { test(8, 42L, 1 << 10, 1 << 11) test(16, 42L, 1 << 10, 1 << 11) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/UnsafeProjectionBenchmark.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/UnsafeProjectionBenchmark.scala index e7a99485cdf0..42a4cfc91f82 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/UnsafeProjectionBenchmark.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/UnsafeProjectionBenchmark.scala @@ -41,7 +41,7 @@ object UnsafeProjectionBenchmark extends BenchmarkBase { (1 to numRows).map(_ => encoder.toRow(generator().asInstanceOf[Row]).copy()).toArray } - override def runBenchmarkSuite(): Unit = { + override def runBenchmarkSuite(mainArgs: Array[String]): Unit = { runBenchmark("unsafe projection") { val iters = 1024 * 16 val numRows = 1024 * 16 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetBenchmark.scala index e3df449b41f0..dba906f63aed 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetBenchmark.scala @@ -256,7 +256,7 @@ object DatasetBenchmark extends SqlBasedBenchmark { .getOrCreate() } - override def runBenchmarkSuite(): Unit = { + override def runBenchmarkSuite(mainArgs: Array[String]): Unit = { val numRows = 100000000 val numChains = 10 runBenchmark("Dataset Benchmark") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/AggregateBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/AggregateBenchmark.scala index 86e0df2fea35..b7d28988274b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/AggregateBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/AggregateBenchmark.scala @@ -44,7 +44,7 @@ import org.apache.spark.unsafe.map.BytesToBytesMap */ object AggregateBenchmark extends SqlBasedBenchmark { - override def runBenchmarkSuite(): Unit = { + override def runBenchmarkSuite(mainArgs: Array[String]): Unit = { runBenchmark("aggregate without grouping") { val N = 500L << 22 codegenBenchmark("agg w/o group", N) { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BloomFilterBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BloomFilterBenchmark.scala index 2f3caca849cd..f727ebcf3fd1 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BloomFilterBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BloomFilterBenchmark.scala @@ -80,7 +80,7 @@ object BloomFilterBenchmark extends SqlBasedBenchmark { } } - override def runBenchmarkSuite(): Unit = { + override def runBenchmarkSuite(mainArgs: Array[String]): Unit = { writeBenchmark() readBenchmark() } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DataSourceReadBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DataSourceReadBenchmark.scala index a1e7f9e36f4b..a1f51f8e5480 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DataSourceReadBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DataSourceReadBenchmark.scala @@ -585,7 +585,7 @@ object DataSourceReadBenchmark extends BenchmarkBase with SQLHelper { } } - override def runBenchmarkSuite(): Unit = { + override def runBenchmarkSuite(mainArgs: Array[String]): Unit = { runBenchmark("SQL Single Numeric Column Scan") { Seq(ByteType, ShortType, IntegerType, LongType, FloatType, DoubleType).foreach { dataType => numericScanBenchmark(1024 * 1024 * 15, dataType) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala index cf05ca336171..017b74aabff7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala @@ -198,7 +198,7 @@ object FilterPushdownBenchmark extends BenchmarkBase with SQLHelper { } } - override def runBenchmarkSuite(): Unit = { + override def runBenchmarkSuite(mainArgs: Array[String]): Unit = { runBenchmark("Pushdown for many distinct value case") { withTempPath { dir => withTempTable("orcTable", "parquetTable") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/JoinBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/JoinBenchmark.scala index 7bad4cb927b4..ad81711a1394 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/JoinBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/JoinBenchmark.scala @@ -164,7 +164,7 @@ object JoinBenchmark extends SqlBasedBenchmark { } } - override def runBenchmarkSuite(): Unit = { + override def runBenchmarkSuite(mainArgs: Array[String]): Unit = { runBenchmark("Join Benchmark") { broadcastHashJoinLongKey() broadcastHashJoinLongKeyWithDuplicates() diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MiscBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MiscBenchmark.scala index 43380869fefe..c4662c8999e4 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MiscBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MiscBenchmark.scala @@ -148,7 +148,7 @@ object MiscBenchmark extends SqlBasedBenchmark { } } - override def runBenchmarkSuite(): Unit = { + override def runBenchmarkSuite(mainArgs: Array[String]): Unit = { filterAndAggregateWithoutGroup(500L << 22) limitAndAggregateWithoutGroup(500L << 20) sample(500 << 18) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/PrimitiveArrayBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/PrimitiveArrayBenchmark.scala index 83edf73abfae..8b1c422e63a3 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/PrimitiveArrayBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/PrimitiveArrayBenchmark.scala @@ -36,7 +36,7 @@ object PrimitiveArrayBenchmark extends BenchmarkBase { .config("spark.sql.autoBroadcastJoinThreshold", 1) .getOrCreate() - override def runBenchmarkSuite(): Unit = { + override def runBenchmarkSuite(mainArgs: Array[String]): Unit = { runBenchmark("Write primitive arrays in dataset") { writeDatasetArray(4) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/RangeBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/RangeBenchmark.scala index a844e02dcba3..a9f873f9094b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/RangeBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/RangeBenchmark.scala @@ -32,7 +32,7 @@ import org.apache.spark.benchmark.Benchmark */ object RangeBenchmark extends SqlBasedBenchmark { - override def runBenchmarkSuite(): Unit = { + override def runBenchmarkSuite(mainArgs: Array[String]): Unit = { import spark.implicits._ runBenchmark("range") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/SortBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/SortBenchmark.scala index 9a54e2320b80..784438cd43eb 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/SortBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/SortBenchmark.scala @@ -119,7 +119,7 @@ object SortBenchmark extends BenchmarkBase { benchmark.run() } - override def runBenchmarkSuite(): Unit = { + override def runBenchmarkSuite(mainArgs: Array[String]): Unit = { runBenchmark("radix sort") { sortBenchmark() } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/UnsafeArrayDataBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/UnsafeArrayDataBenchmark.scala index 79eaeab9c399..f582d844cdc4 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/UnsafeArrayDataBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/UnsafeArrayDataBenchmark.scala @@ -194,7 +194,7 @@ object UnsafeArrayDataBenchmark extends BenchmarkBase { benchmark.run } - override def runBenchmarkSuite(): Unit = { + override def runBenchmarkSuite(mainArgs: Array[String]): Unit = { runBenchmark("Benchmark UnsafeArrayData") { readUnsafeArray(10) writeUnsafeArray(10) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/WideSchemaBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/WideSchemaBenchmark.scala index 124661986ca0..f4642e7d353e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/WideSchemaBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/WideSchemaBenchmark.scala @@ -208,7 +208,7 @@ object WideSchemaBenchmark extends SqlBasedBenchmark { deleteTmpFiles() } - override def runBenchmarkSuite(): Unit = { + override def runBenchmarkSuite(mainArgs: Array[String]): Unit = { runBenchmarkWithDeleteTmpFiles("parsing large select expressions") { parsingLargeSelectExpressions() diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/CompressionSchemeBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/CompressionSchemeBenchmark.scala index 0f9079744a22..8ea20f28a37b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/CompressionSchemeBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/CompressionSchemeBenchmark.scala @@ -233,7 +233,7 @@ object CompressionSchemeBenchmark extends BenchmarkBase with AllCompressionSchem runDecodeBenchmark("STRING Decode", iters, count, STRING, testData) } - override def runBenchmarkSuite(): Unit = { + override def runBenchmarkSuite(mainArgs: Array[String]): Unit = { runBenchmark("Compression Scheme Benchmark") { bitEncodingBenchmark(1024) shortEncodingBenchmark(1024) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchBenchmark.scala index f311465e582a..953b3a67d976 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchBenchmark.scala @@ -443,7 +443,7 @@ object ColumnarBatchBenchmark extends BenchmarkBase { benchmark.run } - override def runBenchmarkSuite(): Unit = { + override def runBenchmarkSuite(mainArgs: Array[String]): Unit = { runBenchmark("Int Read/Write") { intAccess(1024 * 40) } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/execution/benchmark/ObjectHashAggregateExecBenchmark.scala b/sql/hive/src/test/scala/org/apache/spark/sql/execution/benchmark/ObjectHashAggregateExecBenchmark.scala index 50ee09678e2c..3226e3a5f318 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/execution/benchmark/ObjectHashAggregateExecBenchmark.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/execution/benchmark/ObjectHashAggregateExecBenchmark.scala @@ -212,7 +212,7 @@ object ObjectHashAggregateExecBenchmark extends BenchmarkBase with SQLHelper { Column(approxPercentile.toAggregateExpression(isDistinct)) } - override def runBenchmarkSuite(): Unit = { + override def runBenchmarkSuite(mainArgs: Array[String]): Unit = { runBenchmark("Hive UDAF vs Spark AF") { hiveUDAFvsSparkAF(2 << 15) } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala index 870ad4818eb2..ec13288f759a 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala @@ -336,7 +336,7 @@ object OrcReadBenchmark extends BenchmarkBase with SQLHelper { } } - override def runBenchmarkSuite(): Unit = { + override def runBenchmarkSuite(mainArgs: Array[String]): Unit = { runBenchmark("SQL Single Numeric Column Scan") { Seq(ByteType, ShortType, IntegerType, LongType, FloatType, DoubleType).foreach { dataType => numericScanBenchmark(1024 * 1024 * 15, dataType) From 83d21c231536ae191713d56b9fbe144895d60b52 Mon Sep 17 00:00:00 2001 From: yucai Date: Mon, 29 Oct 2018 13:52:09 +0800 Subject: [PATCH 4/8] minor --- .../spark/sql/execution/benchmark/AvroWriteBenchmark.scala | 2 +- .../execution/benchmark/BuiltInDataSourceWriteBenchmark.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/external/avro/src/test/scala/org/apache/spark/sql/execution/benchmark/AvroWriteBenchmark.scala b/external/avro/src/test/scala/org/apache/spark/sql/execution/benchmark/AvroWriteBenchmark.scala index e6cf4d66b39b..0118d6562bbf 100644 --- a/external/avro/src/test/scala/org/apache/spark/sql/execution/benchmark/AvroWriteBenchmark.scala +++ b/external/avro/src/test/scala/org/apache/spark/sql/execution/benchmark/AvroWriteBenchmark.scala @@ -28,7 +28,7 @@ package org.apache.spark.sql.execution.benchmark * }}} */ object AvroWriteBenchmark extends DataSourceWriteBenchmark { - override def runBenchmarkSuite(): Unit = { + override def runBenchmarkSuite(mainArgs: Array[String]): Unit = { runDataSourceBenchmark("Avro") } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BuiltInDataSourceWriteBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BuiltInDataSourceWriteBenchmark.scala index 274c43d6bc30..47a8af76caf8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BuiltInDataSourceWriteBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BuiltInDataSourceWriteBenchmark.scala @@ -38,7 +38,7 @@ package org.apache.spark.sql.execution.benchmark * */ object BuiltInDataSourceWriteBenchmark extends DataSourceWriteBenchmark { - override def runBenchmarkSuite(): Unit = { + override def runBenchmarkSuite(mainArgs: Array[String]): Unit = { val formats: Seq[String] = if (mainArgs.isEmpty) { Seq("Parquet", "ORC", "JSON", "CSV") } else { From 74f6ef5405f51f682a51ed78d9a3f4775c41f670 Mon Sep 17 00:00:00 2001 From: yucai Date: Mon, 29 Oct 2018 23:18:27 +0800 Subject: [PATCH 5/8] minor --- .../spark/sql/execution/benchmark/AvroWriteBenchmark.scala | 5 +++-- .../benchmark/BuiltInDataSourceWriteBenchmark.scala | 6 +++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/external/avro/src/test/scala/org/apache/spark/sql/execution/benchmark/AvroWriteBenchmark.scala b/external/avro/src/test/scala/org/apache/spark/sql/execution/benchmark/AvroWriteBenchmark.scala index 0118d6562bbf..3012ab3679ec 100644 --- a/external/avro/src/test/scala/org/apache/spark/sql/execution/benchmark/AvroWriteBenchmark.scala +++ b/external/avro/src/test/scala/org/apache/spark/sql/execution/benchmark/AvroWriteBenchmark.scala @@ -19,9 +19,10 @@ package org.apache.spark.sql.execution.benchmark /** * Benchmark to measure Avro data sources write performance. + * To run this benchmark: * {{{ - * To run this benchmark: - * 1. without sbt: bin/spark-submit --class + * 1. without sbt: bin/spark-submit --class + * --jars , * 2. build/sbt "sql/test:runMain " * 3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "avro/test:runMain " * Results will be written to "benchmarks/AvroWriteBenchmark-results.txt". diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BuiltInDataSourceWriteBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BuiltInDataSourceWriteBenchmark.scala index 47a8af76caf8..cd97324c997f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BuiltInDataSourceWriteBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BuiltInDataSourceWriteBenchmark.scala @@ -18,11 +18,11 @@ package org.apache.spark.sql.execution.benchmark /** * Benchmark to measure built-in data sources write performance. - * + * To run this benchmark: * {{{ - * To run this benchmark: * By default it measures 4 data source format: Parquet, ORC, JSON, CSV. - * 1. without sbt: bin/spark-submit --class + * 1. without sbt: bin/spark-submit --class + * --jars , * 2. build/sbt "sql/test:runMain " * 3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain " * Results will be written to "benchmarks/BuiltInDataSourceWriteBenchmark-results.txt". From f06b2acd93a87e64a955b34cbf4fd2ca7d5faf00 Mon Sep 17 00:00:00 2001 From: yucai Date: Tue, 30 Oct 2018 10:36:45 +0800 Subject: [PATCH 6/8] address comments --- .../spark/sql/execution/benchmark/AvroWriteBenchmark.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/external/avro/src/test/scala/org/apache/spark/sql/execution/benchmark/AvroWriteBenchmark.scala b/external/avro/src/test/scala/org/apache/spark/sql/execution/benchmark/AvroWriteBenchmark.scala index 3012ab3679ec..91dd04d5d2c4 100644 --- a/external/avro/src/test/scala/org/apache/spark/sql/execution/benchmark/AvroWriteBenchmark.scala +++ b/external/avro/src/test/scala/org/apache/spark/sql/execution/benchmark/AvroWriteBenchmark.scala @@ -22,7 +22,9 @@ package org.apache.spark.sql.execution.benchmark * To run this benchmark: * {{{ * 1. without sbt: bin/spark-submit --class - * --jars , + * --jars , + * --packages org.apache.spark:spark-avro_: + * * 2. build/sbt "sql/test:runMain " * 3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "avro/test:runMain " * Results will be written to "benchmarks/AvroWriteBenchmark-results.txt". From 93ca3be2d860516141ff74188d775c17c3afce78 Mon Sep 17 00:00:00 2001 From: yucai Date: Tue, 30 Oct 2018 10:53:23 +0800 Subject: [PATCH 7/8] minor --- .../spark/sql/execution/benchmark/AvroWriteBenchmark.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/external/avro/src/test/scala/org/apache/spark/sql/execution/benchmark/AvroWriteBenchmark.scala b/external/avro/src/test/scala/org/apache/spark/sql/execution/benchmark/AvroWriteBenchmark.scala index 91dd04d5d2c4..0b11434757c9 100644 --- a/external/avro/src/test/scala/org/apache/spark/sql/execution/benchmark/AvroWriteBenchmark.scala +++ b/external/avro/src/test/scala/org/apache/spark/sql/execution/benchmark/AvroWriteBenchmark.scala @@ -22,9 +22,9 @@ package org.apache.spark.sql.execution.benchmark * To run this benchmark: * {{{ * 1. without sbt: bin/spark-submit --class - * --jars , - * --packages org.apache.spark:spark-avro_: - * + * --jars ,, + * , + * * 2. build/sbt "sql/test:runMain " * 3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "avro/test:runMain " * Results will be written to "benchmarks/AvroWriteBenchmark-results.txt". From cf8dbea663fba0aa9f7d31682274ba67caaefd7d Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Wed, 31 Oct 2018 09:46:47 +0000 Subject: [PATCH 8/8] Update result --- .../benchmarks/AvroWriteBenchmark-results.txt | 14 ++--- ...uiltInDataSourceWriteBenchmark-results.txt | 56 +++++++++---------- 2 files changed, 35 insertions(+), 35 deletions(-) diff --git a/external/avro/benchmarks/AvroWriteBenchmark-results.txt b/external/avro/benchmarks/AvroWriteBenchmark-results.txt index 93d94bffadf4..fb2a77333eec 100644 --- a/external/avro/benchmarks/AvroWriteBenchmark-results.txt +++ b/external/avro/benchmarks/AvroWriteBenchmark-results.txt @@ -1,10 +1,10 @@ -Java HotSpot(TM) 64-Bit Server VM 1.8.0_162-b12 on Mac OS X 10.13.6 -Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz +OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz Avro writer benchmark: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Output Single Int Column 2287 / 2295 6.9 145.4 1.0X -Output Single Double Column 2494 / 2538 6.3 158.6 0.9X -Output Int and String Column 5555 / 5587 2.8 353.2 0.4X -Output Partitions 3928 / 4157 4.0 249.7 0.6X -Output Buckets 5374 / 5441 2.9 341.7 0.4X +Output Single Int Column 3213 / 3373 4.9 204.3 1.0X +Output Single Double Column 3313 / 3345 4.7 210.7 1.0X +Output Int and String Column 7303 / 7316 2.2 464.3 0.4X +Output Partitions 5309 / 5691 3.0 337.5 0.6X +Output Buckets 7031 / 7557 2.2 447.0 0.5X diff --git a/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-results.txt b/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-results.txt index 162783850332..9d656fc10dce 100644 --- a/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-results.txt +++ b/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-results.txt @@ -2,59 +2,59 @@ Parquet writer benchmark ================================================================================================ -Java HotSpot(TM) 64-Bit Server VM 1.8.0_162-b12 on Mac OS X 10.13.6 -Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz +OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz Parquet writer benchmark: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Output Single Int Column 2049 / 2120 7.7 130.2 1.0X -Output Single Double Column 2194 / 2203 7.2 139.5 0.9X -Output Int and String Column 5704 / 5715 2.8 362.7 0.4X -Output Partitions 3727 / 3856 4.2 237.0 0.5X -Output Buckets 5119 / 5361 3.1 325.4 0.4X +Output Single Int Column 2354 / 2438 6.7 149.7 1.0X +Output Single Double Column 2462 / 2485 6.4 156.5 1.0X +Output Int and String Column 8083 / 8100 1.9 513.9 0.3X +Output Partitions 5015 / 5027 3.1 318.8 0.5X +Output Buckets 6883 / 6887 2.3 437.6 0.3X ================================================================================================ ORC writer benchmark ================================================================================================ -Java HotSpot(TM) 64-Bit Server VM 1.8.0_162-b12 on Mac OS X 10.13.6 -Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz +OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz ORC writer benchmark: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Output Single Int Column 1109 / 1116 14.2 70.5 1.0X -Output Single Double Column 1366 / 1378 11.5 86.8 0.8X -Output Int and String Column 5303 / 5318 3.0 337.2 0.2X -Output Partitions 3078 / 3472 5.1 195.7 0.4X -Output Buckets 4374 / 4398 3.6 278.1 0.3X +Output Single Int Column 1769 / 1789 8.9 112.4 1.0X +Output Single Double Column 1989 / 2009 7.9 126.5 0.9X +Output Int and String Column 7323 / 7400 2.1 465.6 0.2X +Output Partitions 4374 / 4381 3.6 278.1 0.4X +Output Buckets 6086 / 6104 2.6 386.9 0.3X ================================================================================================ JSON writer benchmark ================================================================================================ -Java HotSpot(TM) 64-Bit Server VM 1.8.0_162-b12 on Mac OS X 10.13.6 -Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz +OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz JSON writer benchmark: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Output Single Int Column 1806 / 1813 8.7 114.8 1.0X -Output Single Double Column 2530 / 2538 6.2 160.8 0.7X -Output Int and String Column 5311 / 5344 3.0 337.6 0.3X -Output Partitions 3524 / 3580 4.5 224.1 0.5X -Output Buckets 4661 / 4723 3.4 296.3 0.4X +Output Single Int Column 2954 / 4085 5.3 187.8 1.0X +Output Single Double Column 3832 / 3837 4.1 243.6 0.8X +Output Int and String Column 9591 / 10336 1.6 609.8 0.3X +Output Partitions 4956 / 4994 3.2 315.1 0.6X +Output Buckets 6608 / 6676 2.4 420.1 0.4X ================================================================================================ CSV writer benchmark ================================================================================================ -Java HotSpot(TM) 64-Bit Server VM 1.8.0_162-b12 on Mac OS X 10.13.6 -Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz +OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz CSV writer benchmark: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Output Single Int Column 2863 / 2896 5.5 182.0 1.0X -Output Single Double Column 3233 / 3238 4.9 205.6 0.9X -Output Int and String Column 6805 / 6822 2.3 432.6 0.4X -Output Partitions 4873 / 4886 3.2 309.8 0.6X -Output Buckets 6733 / 6757 2.3 428.1 0.4X +Output Single Int Column 4118 / 4125 3.8 261.8 1.0X +Output Single Double Column 4888 / 4891 3.2 310.8 0.8X +Output Int and String Column 9788 / 9872 1.6 622.3 0.4X +Output Partitions 6578 / 6640 2.4 418.2 0.6X +Output Buckets 9125 / 9171 1.7 580.2 0.5X