[SPARK-25676][SQL][TEST] Rename and refactor BenchmarkWideTable to use main method

yucai · Yucai Yu · dongjoon-hyun · dongjoon-hyun · commit 63ca4bbe7927 · 2018-11-06T15:40:56.000-08:00
## What changes were proposed in this pull request? Refactor BenchmarkWideTable to use main method. Generate benchmark result: ``` SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain org.apache.spark.sql.execution.benchmark.WideTableBenchmark" ``` ## How was this patch tested? manual tests Closes #22823 from yucai/BenchmarkWideTable. Lead-authored-by: yucai <yyu1@ebay.com> Co-authored-by: Yucai Yu <yucai.yu@foxmail.com> Co-authored-by: Dongjoon Hyun <dongjoon@apache.org> Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
diff --git a/sql/core/benchmarks/WideTableBenchmark-results.txt b/sql/core/benchmarks/WideTableBenchmark-results.txt
@@ -0,0 +1,17 @@
+================================================================================================
+projection on wide table
+================================================================================================
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+projection on wide table:                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+split threshold 10                          38932 / 39307          0.0       37128.1       1.0X
+split threshold 100                         31991 / 32556          0.0       30508.8       1.2X
+split threshold 1024                        10993 / 11041          0.1       10483.5       3.5X
+split threshold 2048                          8959 / 8998          0.1        8543.8       4.3X
+split threshold 4096                          8116 / 8134          0.1        7739.8       4.8X
+split threshold 8196                          8069 / 8098          0.1        7695.5       4.8X
+split threshold 65536                       57068 / 57339          0.0       54424.3       0.7X
+
+
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BenchmarkWideTable.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BenchmarkWideTable.scala
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/WideTableBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/WideTableBenchmark.scala
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.benchmark
+
+import org.apache.spark.benchmark.Benchmark
+import org.apache.spark.sql.internal.SQLConf
+
+/**
+ * Benchmark to measure performance for wide table.
+ * {{{
+ *   To run this benchmark:
+ *   1. without sbt: bin/spark-submit --class <this class>
+ *        --jars <spark core test jar>,<spark catalyst test jar> <spark sql test jar>
+ *   2. build/sbt "sql/test:runMain <this class>"
+ *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *      Results will be written to "benchmarks/WideTableBenchmark-results.txt".
+ * }}}
+ */
+object WideTableBenchmark extends SqlBasedBenchmark {
+
+  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+    runBenchmark("projection on wide table") {
+      val N = 1 << 20
+      val df = spark.range(N)
+      val columns = (0 until 400).map{ i => s"id as id$i"}
+      val benchmark = new Benchmark("projection on wide table", N, output = output)
+      Seq("10", "100", "1024", "2048", "4096", "8192", "65536").foreach { n =>
+        benchmark.addCase(s"split threshold $n", numIters = 5) { iter =>
+          withSQLConf(SQLConf.CODEGEN_METHOD_SPLIT_THRESHOLD.key -> n) {
+            df.selectExpr(columns: _*).foreach(identity(_))
+          }
+        }
+      }
+      benchmark.run()
+    }
+  }
+}