Skip to content

Commit 63ca4bb

Browse files
yucaiYucai Yudongjoon-hyun
committed
[SPARK-25676][SQL][TEST] Rename and refactor BenchmarkWideTable to use main method
## What changes were proposed in this pull request? Refactor BenchmarkWideTable to use main method. Generate benchmark result: ``` SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain org.apache.spark.sql.execution.benchmark.WideTableBenchmark" ``` ## How was this patch tested? manual tests Closes #22823 from yucai/BenchmarkWideTable. Lead-authored-by: yucai <yyu1@ebay.com> Co-authored-by: Yucai Yu <yucai.yu@foxmail.com> Co-authored-by: Dongjoon Hyun <dongjoon@apache.org> Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
1 parent a241a15 commit 63ca4bb

File tree

3 files changed

+69
-52
lines changed

3 files changed

+69
-52
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
================================================================================================
2+
projection on wide table
3+
================================================================================================
4+
5+
OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
6+
Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
7+
projection on wide table: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
8+
------------------------------------------------------------------------------------------------
9+
split threshold 10 38932 / 39307 0.0 37128.1 1.0X
10+
split threshold 100 31991 / 32556 0.0 30508.8 1.2X
11+
split threshold 1024 10993 / 11041 0.1 10483.5 3.5X
12+
split threshold 2048 8959 / 8998 0.1 8543.8 4.3X
13+
split threshold 4096 8116 / 8134 0.1 7739.8 4.8X
14+
split threshold 8196 8069 / 8098 0.1 7695.5 4.8X
15+
split threshold 65536 57068 / 57339 0.0 54424.3 0.7X
16+
17+

sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BenchmarkWideTable.scala

-52
This file was deleted.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark.sql.execution.benchmark
19+
20+
import org.apache.spark.benchmark.Benchmark
21+
import org.apache.spark.sql.internal.SQLConf
22+
23+
/**
24+
* Benchmark to measure performance for wide table.
25+
* {{{
26+
* To run this benchmark:
27+
* 1. without sbt: bin/spark-submit --class <this class>
28+
* --jars <spark core test jar>,<spark catalyst test jar> <spark sql test jar>
29+
* 2. build/sbt "sql/test:runMain <this class>"
30+
* 3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
31+
* Results will be written to "benchmarks/WideTableBenchmark-results.txt".
32+
* }}}
33+
*/
34+
object WideTableBenchmark extends SqlBasedBenchmark {
35+
36+
override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
37+
runBenchmark("projection on wide table") {
38+
val N = 1 << 20
39+
val df = spark.range(N)
40+
val columns = (0 until 400).map{ i => s"id as id$i"}
41+
val benchmark = new Benchmark("projection on wide table", N, output = output)
42+
Seq("10", "100", "1024", "2048", "4096", "8192", "65536").foreach { n =>
43+
benchmark.addCase(s"split threshold $n", numIters = 5) { iter =>
44+
withSQLConf(SQLConf.CODEGEN_METHOD_SPLIT_THRESHOLD.key -> n) {
45+
df.selectExpr(columns: _*).foreach(identity(_))
46+
}
47+
}
48+
}
49+
benchmark.run()
50+
}
51+
}
52+
}

0 commit comments

Comments
 (0)