From 4339b1cbc5de7e54a7cd5be818fcf3dab249a351 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Sun, 7 Oct 2018 09:34:54 +0100 Subject: [PATCH 1/7] Refactor JoinBenchmark --- sql/core/benchmarks/JoinBenchmark-results.txt | 80 +++++++ .../execution/benchmark/JoinBenchmark.scala | 213 ++++++------------ 2 files changed, 154 insertions(+), 139 deletions(-) create mode 100644 sql/core/benchmarks/JoinBenchmark-results.txt diff --git a/sql/core/benchmarks/JoinBenchmark-results.txt b/sql/core/benchmarks/JoinBenchmark-results.txt new file mode 100644 index 000000000000..cf6d83378c3c --- /dev/null +++ b/sql/core/benchmarks/JoinBenchmark-results.txt @@ -0,0 +1,80 @@ +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Join w long: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Join w long wholestage off 4062 / 4709 5.2 193.7 1.0X +Join w long wholestage on 152 / 163 138.4 7.2 26.8X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Join w long duplicated: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Join w long duplicated wholestage off 3793 / 3801 5.5 180.9 1.0X +Join w long duplicated wholestage on 207 / 219 101.1 9.9 18.3X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Join w 2 ints: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Join w 2 ints wholestage off 138514 / 139178 0.2 6604.9 1.0X +Join w 2 ints wholestage on 129908 / 140869 0.2 6194.5 1.1X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Join w 2 longs: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Join w 2 longs wholestage off 5324 / 5369 3.9 253.8 1.0X +Join w 2 longs wholestage on 2463 / 2986 8.5 117.5 2.2X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Join w 2 longs duplicated: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Join w 2 longs duplicated wholestage off 11889 / 12665 1.8 566.9 1.0X +Join w 2 longs duplicated wholestage on 1798 / 1899 11.7 85.7 6.6X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +outer join w long: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +outer join w long wholestage off 2865 / 3007 7.3 136.6 1.0X +outer join w long wholestage on 158 / 182 132.9 7.5 18.1X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +semi join w long: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +semi join w long wholestage off 1803 / 2159 11.6 86.0 1.0X +semi join w long wholestage on 150 / 186 140.2 7.1 12.1X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +merge join: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +merge join wholestage off 865 / 887 2.4 412.7 1.0X +merge join wholestage on 622 / 690 3.4 296.7 1.4X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +sort merge join with duplicates: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +sort merge join with duplicates wholestage off 1431 / 1504 1.5 682.4 1.0X +sort merge join with duplicates wholestage on 1167 / 1240 1.8 556.3 1.2X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +shuffle hash join: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +shuffle hash join wholestage off 1004 / 1027 4.2 239.4 1.0X +shuffle hash join wholestage on 786 / 802 5.3 187.5 1.3X + diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/JoinBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/JoinBenchmark.scala index 37744dccc06f..a6fc2574a9bf 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/JoinBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/JoinBenchmark.scala @@ -19,229 +19,164 @@ package org.apache.spark.sql.execution.benchmark import org.apache.spark.sql.execution.joins._ import org.apache.spark.sql.functions._ +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.IntegerType /** * Benchmark to measure performance for aggregate primitives. - * To run this: - * build/sbt "sql/test-only *benchmark.JoinBenchmark" - * - * Benchmarks in this file are skipped in normal builds. + * To run this benchmark: + * {{{ + * 1. without sbt: + * bin/spark-submit --class --jars + * 2. build/sbt "sql/test:runMain " + * 3. generate result: + * SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain " + * Results will be written to "benchmarks/JoinBenchmark-results.txt". + * }}} */ -class JoinBenchmark extends BenchmarkWithCodegen { +object JoinBenchmark extends SqlBasedBenchmark { - ignore("broadcast hash join, long key") { + def broadcastHashJoinLongKey(): Unit = { val N = 20 << 20 val M = 1 << 16 - val dim = broadcast(sparkSession.range(M).selectExpr("id as k", "cast(id as string) as v")) - runBenchmark("Join w long", N) { - val df = sparkSession.range(N).join(dim, (col("id") % M) === col("k")) + val dim = broadcast(spark.range(M).selectExpr("id as k", "cast(id as string) as v")) + codegenBenchmark("Join w long", N) { + val df = spark.range(N).join(dim, (col("id") % M) === col("k")) assert(df.queryExecution.sparkPlan.find(_.isInstanceOf[BroadcastHashJoinExec]).isDefined) df.count() } - - /* - Java HotSpot(TM) 64-Bit Server VM 1.7.0_60-b19 on Mac OS X 10.9.5 - Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz - Join w long: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------- - Join w long codegen=false 3002 / 3262 7.0 143.2 1.0X - Join w long codegen=true 321 / 371 65.3 15.3 9.3X - */ } - ignore("broadcast hash join, long key with duplicates") { + + def broadcastHashJoinLongKeyWithDuplicates(): Unit = { val N = 20 << 20 val M = 1 << 16 - val dim = broadcast(sparkSession.range(M).selectExpr("id as k", "cast(id as string) as v")) - runBenchmark("Join w long duplicated", N) { - val dim = broadcast(sparkSession.range(M).selectExpr("cast(id/10 as long) as k")) - val df = sparkSession.range(N).join(dim, (col("id") % M) === col("k")) + codegenBenchmark("Join w long duplicated", N) { + val dim = broadcast(spark.range(M).selectExpr("cast(id/10 as long) as k")) + val df = spark.range(N).join(dim, (col("id") % M) === col("k")) assert(df.queryExecution.sparkPlan.find(_.isInstanceOf[BroadcastHashJoinExec]).isDefined) df.count() } - - /* - *Java HotSpot(TM) 64-Bit Server VM 1.7.0_60-b19 on Mac OS X 10.9.5 - *Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz - *Join w long duplicated: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - *------------------------------------------------------------------------------------------- - *Join w long duplicated codegen=false 3446 / 3478 6.1 164.3 1.0X - *Join w long duplicated codegen=true 322 / 351 65.2 15.3 10.7X - */ } - ignore("broadcast hash join, two int key") { + def broadcastHashJoinTwoIntKey(): Unit = { val N = 20 << 20 val M = 1 << 16 - val dim2 = broadcast(sparkSession.range(M) + val dim2 = broadcast(spark.range(M) .selectExpr("cast(id as int) as k1", "cast(id as int) as k2", "cast(id as string) as v")) - runBenchmark("Join w 2 ints", N) { - val df = sparkSession.range(N).join(dim2, + codegenBenchmark("Join w 2 ints", N) { + val df = spark.range(N).join(dim2, (col("id") % M).cast(IntegerType) === col("k1") && (col("id") % M).cast(IntegerType) === col("k2")) assert(df.queryExecution.sparkPlan.find(_.isInstanceOf[BroadcastHashJoinExec]).isDefined) df.count() } - - /* - *Java HotSpot(TM) 64-Bit Server VM 1.7.0_60-b19 on Mac OS X 10.9.5 - *Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz - *Join w 2 ints: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - *------------------------------------------------------------------------------------------- - *Join w 2 ints codegen=false 4426 / 4501 4.7 211.1 1.0X - *Join w 2 ints codegen=true 791 / 818 26.5 37.7 5.6X - */ } - ignore("broadcast hash join, two long key") { + def broadcastHashJoinTwoLongKey(): Unit = { val N = 20 << 20 val M = 1 << 16 - val dim3 = broadcast(sparkSession.range(M) + val dim3 = broadcast(spark.range(M) .selectExpr("id as k1", "id as k2", "cast(id as string) as v")) - runBenchmark("Join w 2 longs", N) { - val df = sparkSession.range(N).join(dim3, + codegenBenchmark("Join w 2 longs", N) { + val df = spark.range(N).join(dim3, (col("id") % M) === col("k1") && (col("id") % M) === col("k2")) assert(df.queryExecution.sparkPlan.find(_.isInstanceOf[BroadcastHashJoinExec]).isDefined) df.count() } - - /* - *Java HotSpot(TM) 64-Bit Server VM 1.7.0_60-b19 on Mac OS X 10.9.5 - *Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz - *Join w 2 longs: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - *------------------------------------------------------------------------------------------- - *Join w 2 longs codegen=false 5905 / 6123 3.6 281.6 1.0X - *Join w 2 longs codegen=true 2230 / 2529 9.4 106.3 2.6X - */ } - ignore("broadcast hash join, two long key with duplicates") { + def broadcastHashJoinTwoLongKeyWithDuplicates(): Unit = { val N = 20 << 20 val M = 1 << 16 - val dim4 = broadcast(sparkSession.range(M) + val dim4 = broadcast(spark.range(M) .selectExpr("cast(id/10 as long) as k1", "cast(id/10 as long) as k2")) - runBenchmark("Join w 2 longs duplicated", N) { - val df = sparkSession.range(N).join(dim4, + codegenBenchmark("Join w 2 longs duplicated", N) { + val df = spark.range(N).join(dim4, (col("id") bitwiseAND M) === col("k1") && (col("id") bitwiseAND M) === col("k2")) assert(df.queryExecution.sparkPlan.find(_.isInstanceOf[BroadcastHashJoinExec]).isDefined) df.count() } - - /* - *Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz - *Join w 2 longs duplicated: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - *------------------------------------------------------------------------------------------- - *Join w 2 longs duplicated codegen=false 6420 / 6587 3.3 306.1 1.0X - *Join w 2 longs duplicated codegen=true 2080 / 2139 10.1 99.2 3.1X - */ } - ignore("broadcast hash join, outer join long key") { + + def broadcastHashJoinOuterJoinLongKey(): Unit = { val N = 20 << 20 val M = 1 << 16 - val dim = broadcast(sparkSession.range(M).selectExpr("id as k", "cast(id as string) as v")) - runBenchmark("outer join w long", N) { - val df = sparkSession.range(N).join(dim, (col("id") % M) === col("k"), "left") + val dim = broadcast(spark.range(M).selectExpr("id as k", "cast(id as string) as v")) + codegenBenchmark("outer join w long", N) { + val df = spark.range(N).join(dim, (col("id") % M) === col("k"), "left") assert(df.queryExecution.sparkPlan.find(_.isInstanceOf[BroadcastHashJoinExec]).isDefined) df.count() } - - /* - *Java HotSpot(TM) 64-Bit Server VM 1.7.0_60-b19 on Mac OS X 10.9.5 - *Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz - *outer join w long: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - *------------------------------------------------------------------------------------------- - *outer join w long codegen=false 3055 / 3189 6.9 145.7 1.0X - *outer join w long codegen=true 261 / 276 80.5 12.4 11.7X - */ } - ignore("broadcast hash join, semi join long key") { + + def broadcastHashJoinSemiJoinLongKey(): Unit = { val N = 20 << 20 val M = 1 << 16 - val dim = broadcast(sparkSession.range(M).selectExpr("id as k", "cast(id as string) as v")) - runBenchmark("semi join w long", N) { - val df = sparkSession.range(N).join(dim, (col("id") % M) === col("k"), "leftsemi") + val dim = broadcast(spark.range(M).selectExpr("id as k", "cast(id as string) as v")) + codegenBenchmark("semi join w long", N) { + val df = spark.range(N).join(dim, (col("id") % M) === col("k"), "leftsemi") assert(df.queryExecution.sparkPlan.find(_.isInstanceOf[BroadcastHashJoinExec]).isDefined) df.count() } - - /* - *Java HotSpot(TM) 64-Bit Server VM 1.7.0_60-b19 on Mac OS X 10.9.5 - *Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz - *semi join w long: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - *------------------------------------------------------------------------------------------- - *semi join w long codegen=false 1912 / 1990 11.0 91.2 1.0X - *semi join w long codegen=true 237 / 244 88.3 11.3 8.1X - */ } - ignore("sort merge join") { + def sortMergeJoin(): Unit = { val N = 2 << 20 - runBenchmark("merge join", N) { - val df1 = sparkSession.range(N).selectExpr(s"id * 2 as k1") - val df2 = sparkSession.range(N).selectExpr(s"id * 3 as k2") + codegenBenchmark("merge join", N) { + val df1 = spark.range(N).selectExpr(s"id * 2 as k1") + val df2 = spark.range(N).selectExpr(s"id * 3 as k2") val df = df1.join(df2, col("k1") === col("k2")) assert(df.queryExecution.sparkPlan.find(_.isInstanceOf[SortMergeJoinExec]).isDefined) df.count() } - - /* - *Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz - *merge join: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - *------------------------------------------------------------------------------------------- - *merge join codegen=false 1588 / 1880 1.3 757.1 1.0X - *merge join codegen=true 1477 / 1531 1.4 704.2 1.1X - */ } - ignore("sort merge join with duplicates") { + def sortMergeJoinWithDuplicates(): Unit = { val N = 2 << 20 - runBenchmark("sort merge join", N) { - val df1 = sparkSession.range(N) + codegenBenchmark("sort merge join with duplicates", N) { + val df1 = spark.range(N) .selectExpr(s"(id * 15485863) % ${N*10} as k1") - val df2 = sparkSession.range(N) + val df2 = spark.range(N) .selectExpr(s"(id * 15485867) % ${N*10} as k2") val df = df1.join(df2, col("k1") === col("k2")) assert(df.queryExecution.sparkPlan.find(_.isInstanceOf[SortMergeJoinExec]).isDefined) df.count() } - - /* - *Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz - *sort merge join: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - *------------------------------------------------------------------------------------------- - *sort merge join codegen=false 3626 / 3667 0.6 1728.9 1.0X - *sort merge join codegen=true 3405 / 3438 0.6 1623.8 1.1X - */ } - ignore("shuffle hash join") { - val N = 4 << 20 - sparkSession.conf.set("spark.sql.shuffle.partitions", "2") - sparkSession.conf.set("spark.sql.autoBroadcastJoinThreshold", "10000000") - sparkSession.conf.set("spark.sql.join.preferSortMergeJoin", "false") - runBenchmark("shuffle hash join", N) { - val df1 = sparkSession.range(N).selectExpr(s"id as k1") - val df2 = sparkSession.range(N / 3).selectExpr(s"id * 3 as k2") - val df = df1.join(df2, col("k1") === col("k2")) - assert(df.queryExecution.sparkPlan.find(_.isInstanceOf[ShuffledHashJoinExec]).isDefined) - df.count() + def shuffleHashJoin(): Unit = { + val N: Long = 4 << 20 + withSQLConf(SQLConf.SHUFFLE_PARTITIONS.key -> "2", + SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "10000000", + SQLConf.PREFER_SORTMERGEJOIN.key -> "false") { + codegenBenchmark("shuffle hash join", N) { + val df1 = spark.range(N).selectExpr(s"id as k1") + val df2 = spark.range(N / 3).selectExpr(s"id * 3 as k2") + val df = df1.join(df2, col("k1") === col("k2")) + assert(df.queryExecution.sparkPlan.find(_.isInstanceOf[ShuffledHashJoinExec]).isDefined) + df.count() + } } + } - /* - *Java HotSpot(TM) 64-Bit Server VM 1.8.0_60-b27 on Windows 7 6.1 - *Intel64 Family 6 Model 94 Stepping 3, GenuineIntel - *shuffle hash join: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - *------------------------------------------------------------------------------------------- - *shuffle hash join codegen=false 2005 / 2010 2.1 478.0 1.0X - *shuffle hash join codegen=true 1773 / 1792 2.4 422.7 1.1X - */ + override def runBenchmarkSuite(): Unit = { + broadcastHashJoinLongKey() + broadcastHashJoinLongKeyWithDuplicates() + broadcastHashJoinTwoIntKey() + broadcastHashJoinTwoLongKey() + broadcastHashJoinTwoLongKeyWithDuplicates() + broadcastHashJoinOuterJoinLongKey() + broadcastHashJoinSemiJoinLongKey() + sortMergeJoin() + sortMergeJoinWithDuplicates() + shuffleHashJoin() } } From 4859a9f5e78edf81c211c304a57e2603e60b2cc7 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Sun, 7 Oct 2018 21:02:28 +0100 Subject: [PATCH 2/7] Put SQLConf.SHUFFLE_PARTITIONS.key to next line --- .../apache/spark/sql/execution/benchmark/JoinBenchmark.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/JoinBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/JoinBenchmark.scala index a6fc2574a9bf..ea6ea799cd60 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/JoinBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/JoinBenchmark.scala @@ -154,7 +154,8 @@ object JoinBenchmark extends SqlBasedBenchmark { def shuffleHashJoin(): Unit = { val N: Long = 4 << 20 - withSQLConf(SQLConf.SHUFFLE_PARTITIONS.key -> "2", + withSQLConf( + SQLConf.SHUFFLE_PARTITIONS.key -> "2", SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "10000000", SQLConf.PREFER_SORTMERGEJOIN.key -> "false") { codegenBenchmark("shuffle hash join", N) { From 2baaf35a89d2cd5f70a0c21c05c392af7affb403 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Thu, 11 Oct 2018 15:43:08 +0800 Subject: [PATCH 3/7] Move broadcast outside codegenBenchmark --- .../spark/sql/execution/benchmark/JoinBenchmark.scala | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/JoinBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/JoinBenchmark.scala index ea6ea799cd60..b795ff20f708 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/JoinBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/JoinBenchmark.scala @@ -48,13 +48,11 @@ object JoinBenchmark extends SqlBasedBenchmark { } } - def broadcastHashJoinLongKeyWithDuplicates(): Unit = { val N = 20 << 20 val M = 1 << 16 - + val dim = broadcast(spark.range(M).selectExpr("cast(id/10 as long) as k")) codegenBenchmark("Join w long duplicated", N) { - val dim = broadcast(spark.range(M).selectExpr("cast(id/10 as long) as k")) val df = spark.range(N).join(dim, (col("id") % M) === col("k")) assert(df.queryExecution.sparkPlan.find(_.isInstanceOf[BroadcastHashJoinExec]).isDefined) df.count() @@ -104,7 +102,6 @@ object JoinBenchmark extends SqlBasedBenchmark { } } - def broadcastHashJoinOuterJoinLongKey(): Unit = { val N = 20 << 20 val M = 1 << 16 @@ -116,7 +113,6 @@ object JoinBenchmark extends SqlBasedBenchmark { } } - def broadcastHashJoinSemiJoinLongKey(): Unit = { val N = 20 << 20 val M = 1 << 16 From 00c495091dfdfb9f647c0e66307b4cc8ef2a19a3 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Thu, 11 Oct 2018 19:02:14 +0800 Subject: [PATCH 4/7] rerun benchmark --- sql/core/benchmarks/JoinBenchmark-results.txt | 40 +++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/sql/core/benchmarks/JoinBenchmark-results.txt b/sql/core/benchmarks/JoinBenchmark-results.txt index cf6d83378c3c..42840e1f2004 100644 --- a/sql/core/benchmarks/JoinBenchmark-results.txt +++ b/sql/core/benchmarks/JoinBenchmark-results.txt @@ -3,78 +3,78 @@ Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz Join w long: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Join w long wholestage off 4062 / 4709 5.2 193.7 1.0X -Join w long wholestage on 152 / 163 138.4 7.2 26.8X +Join w long wholestage off 3200 / 3226 6.6 152.6 1.0X +Join w long wholestage on 223 / 261 94.2 10.6 14.4X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz Join w long duplicated: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Join w long duplicated wholestage off 3793 / 3801 5.5 180.9 1.0X -Join w long duplicated wholestage on 207 / 219 101.1 9.9 18.3X +Join w long duplicated wholestage off 3727 / 3745 5.6 177.7 1.0X +Join w long duplicated wholestage on 212 / 233 98.8 10.1 17.6X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz Join w 2 ints: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Join w 2 ints wholestage off 138514 / 139178 0.2 6604.9 1.0X -Join w 2 ints wholestage on 129908 / 140869 0.2 6194.5 1.1X +Join w 2 ints wholestage off 151556 / 152313 0.1 7226.8 1.0X +Join w 2 ints wholestage on 131599 / 138817 0.2 6275.1 1.2X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz Join w 2 longs: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Join w 2 longs wholestage off 5324 / 5369 3.9 253.8 1.0X -Join w 2 longs wholestage on 2463 / 2986 8.5 117.5 2.2X +Join w 2 longs wholestage off 5608 / 6035 3.7 267.4 1.0X +Join w 2 longs wholestage on 2256 / 2504 9.3 107.6 2.5X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz Join w 2 longs duplicated: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Join w 2 longs duplicated wholestage off 11889 / 12665 1.8 566.9 1.0X -Join w 2 longs duplicated wholestage on 1798 / 1899 11.7 85.7 6.6X +Join w 2 longs duplicated wholestage off 10976 / 11120 1.9 523.4 1.0X +Join w 2 longs duplicated wholestage on 1781 / 1849 11.8 84.9 6.2X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz outer join w long: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -outer join w long wholestage off 2865 / 3007 7.3 136.6 1.0X -outer join w long wholestage on 158 / 182 132.9 7.5 18.1X +outer join w long wholestage off 2436 / 2453 8.6 116.1 1.0X +outer join w long wholestage on 140 / 144 149.4 6.7 17.3X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz semi join w long: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -semi join w long wholestage off 1803 / 2159 11.6 86.0 1.0X -semi join w long wholestage on 150 / 186 140.2 7.1 12.1X +semi join w long wholestage off 1610 / 1625 13.0 76.8 1.0X +semi join w long wholestage on 142 / 148 148.0 6.8 11.4X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz merge join: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -merge join wholestage off 865 / 887 2.4 412.7 1.0X -merge join wholestage on 622 / 690 3.4 296.7 1.4X +merge join wholestage off 864 / 910 2.4 412.0 1.0X +merge join wholestage on 611 / 617 3.4 291.4 1.4X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz sort merge join with duplicates: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -sort merge join with duplicates wholestage off 1431 / 1504 1.5 682.4 1.0X -sort merge join with duplicates wholestage on 1167 / 1240 1.8 556.3 1.2X +sort merge join with duplicates wholestage off 1481 / 1597 1.4 706.2 1.0X +sort merge join with duplicates wholestage on 1192 / 1300 1.8 568.4 1.2X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz shuffle hash join: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -shuffle hash join wholestage off 1004 / 1027 4.2 239.4 1.0X -shuffle hash join wholestage on 786 / 802 5.3 187.5 1.3X +shuffle hash join wholestage off 1012 / 1034 4.1 241.2 1.0X +shuffle hash join wholestage on 799 / 861 5.2 190.5 1.3X From 3be13b16f1a59ffbd158265f54ad4f8d511d2018 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Fri, 12 Oct 2018 03:02:36 +0800 Subject: [PATCH 5/7] address comment --- sql/core/benchmarks/JoinBenchmark-results.txt | 45 ++++++++++--------- .../execution/benchmark/JoinBenchmark.scala | 24 +++++----- 2 files changed, 38 insertions(+), 31 deletions(-) diff --git a/sql/core/benchmarks/JoinBenchmark-results.txt b/sql/core/benchmarks/JoinBenchmark-results.txt index 42840e1f2004..3cca4202d9ff 100644 --- a/sql/core/benchmarks/JoinBenchmark-results.txt +++ b/sql/core/benchmarks/JoinBenchmark-results.txt @@ -1,80 +1,85 @@ +================================================================================================ +Join Benchmark +================================================================================================ + Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz Join w long: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Join w long wholestage off 3200 / 3226 6.6 152.6 1.0X -Join w long wholestage on 223 / 261 94.2 10.6 14.4X +Join w long wholestage off 3292 / 3308 6.4 157.0 1.0X +Join w long wholestage on 163 / 169 128.5 7.8 20.2X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz Join w long duplicated: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Join w long duplicated wholestage off 3727 / 3745 5.6 177.7 1.0X -Join w long duplicated wholestage on 212 / 233 98.8 10.1 17.6X +Join w long duplicated wholestage off 3842 / 3854 5.5 183.2 1.0X +Join w long duplicated wholestage on 212 / 228 99.0 10.1 18.1X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz Join w 2 ints: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Join w 2 ints wholestage off 151556 / 152313 0.1 7226.8 1.0X -Join w 2 ints wholestage on 131599 / 138817 0.2 6275.1 1.2X +Join w 2 ints wholestage off 157742 / 158892 0.1 7521.7 1.0X +Join w 2 ints wholestage on 134290 / 152917 0.2 6403.4 1.2X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz Join w 2 longs: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Join w 2 longs wholestage off 5608 / 6035 3.7 267.4 1.0X -Join w 2 longs wholestage on 2256 / 2504 9.3 107.6 2.5X +Join w 2 longs wholestage off 5578 / 6507 3.8 266.0 1.0X +Join w 2 longs wholestage on 2198 / 2638 9.5 104.8 2.5X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz Join w 2 longs duplicated: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Join w 2 longs duplicated wholestage off 10976 / 11120 1.9 523.4 1.0X -Join w 2 longs duplicated wholestage on 1781 / 1849 11.8 84.9 6.2X +Join w 2 longs duplicated wholestage off 11109 / 11125 1.9 529.7 1.0X +Join w 2 longs duplicated wholestage on 1743 / 1946 12.0 83.1 6.4X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz outer join w long: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -outer join w long wholestage off 2436 / 2453 8.6 116.1 1.0X -outer join w long wholestage on 140 / 144 149.4 6.7 17.3X +outer join w long wholestage off 2508 / 2527 8.4 119.6 1.0X +outer join w long wholestage on 148 / 159 141.3 7.1 16.9X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz semi join w long: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -semi join w long wholestage off 1610 / 1625 13.0 76.8 1.0X -semi join w long wholestage on 142 / 148 148.0 6.8 11.4X +semi join w long wholestage off 1662 / 1719 12.6 79.3 1.0X +semi join w long wholestage on 150 / 168 140.2 7.1 11.1X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz merge join: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -merge join wholestage off 864 / 910 2.4 412.0 1.0X -merge join wholestage on 611 / 617 3.4 291.4 1.4X +merge join wholestage off 819 / 833 2.6 390.5 1.0X +merge join wholestage on 591 / 619 3.5 281.7 1.4X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz sort merge join with duplicates: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -sort merge join with duplicates wholestage off 1481 / 1597 1.4 706.2 1.0X -sort merge join with duplicates wholestage on 1192 / 1300 1.8 568.4 1.2X +sort merge join with duplicates wholestage off 1458 / 1497 1.4 695.3 1.0X +sort merge join with duplicates wholestage on 1129 / 1164 1.9 538.1 1.3X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz shuffle hash join: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -shuffle hash join wholestage off 1012 / 1034 4.1 241.2 1.0X -shuffle hash join wholestage on 799 / 861 5.2 190.5 1.3X +shuffle hash join wholestage off 959 / 970 4.4 228.7 1.0X +shuffle hash join wholestage on 817 / 850 5.1 194.9 1.2X + diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/JoinBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/JoinBenchmark.scala index b795ff20f708..001726510855 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/JoinBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/JoinBenchmark.scala @@ -23,7 +23,7 @@ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.IntegerType /** - * Benchmark to measure performance for aggregate primitives. + * Benchmark to measure performance for joins. * To run this benchmark: * {{{ * 1. without sbt: @@ -165,15 +165,17 @@ object JoinBenchmark extends SqlBasedBenchmark { } override def runBenchmarkSuite(): Unit = { - broadcastHashJoinLongKey() - broadcastHashJoinLongKeyWithDuplicates() - broadcastHashJoinTwoIntKey() - broadcastHashJoinTwoLongKey() - broadcastHashJoinTwoLongKeyWithDuplicates() - broadcastHashJoinOuterJoinLongKey() - broadcastHashJoinSemiJoinLongKey() - sortMergeJoin() - sortMergeJoinWithDuplicates() - shuffleHashJoin() + runBenchmark("Join Benchmark") { + broadcastHashJoinLongKey() + broadcastHashJoinLongKeyWithDuplicates() + broadcastHashJoinTwoIntKey() + broadcastHashJoinTwoLongKey() + broadcastHashJoinTwoLongKeyWithDuplicates() + broadcastHashJoinOuterJoinLongKey() + broadcastHashJoinSemiJoinLongKey() + sortMergeJoin() + sortMergeJoinWithDuplicates() + shuffleHashJoin() + } } } From 28f9b9a8a26caf8750aa2e8c8e2bc793b3773d98 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Fri, 12 Oct 2018 01:10:04 -0700 Subject: [PATCH 6/7] Updat result (#18) --- sql/core/benchmarks/JoinBenchmark-results.txt | 90 +++++++++---------- 1 file changed, 40 insertions(+), 50 deletions(-) diff --git a/sql/core/benchmarks/JoinBenchmark-results.txt b/sql/core/benchmarks/JoinBenchmark-results.txt index 3cca4202d9ff..005ffc2158d0 100644 --- a/sql/core/benchmarks/JoinBenchmark-results.txt +++ b/sql/core/benchmarks/JoinBenchmark-results.txt @@ -2,84 +2,74 @@ Join Benchmark ================================================================================================ -Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 -Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz - +OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz Join w long: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Join w long wholestage off 3292 / 3308 6.4 157.0 1.0X -Join w long wholestage on 163 / 169 128.5 7.8 20.2X - -Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 -Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz +Join w long wholestage off 4464 / 4483 4.7 212.9 1.0X +Join w long wholestage on 289 / 339 72.6 13.8 15.5X +OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz Join w long duplicated: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Join w long duplicated wholestage off 3842 / 3854 5.5 183.2 1.0X -Join w long duplicated wholestage on 212 / 228 99.0 10.1 18.1X - -Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 -Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz +Join w long duplicated wholestage off 5662 / 5678 3.7 270.0 1.0X +Join w long duplicated wholestage on 332 / 345 63.1 15.8 17.0X +OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz Join w 2 ints: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Join w 2 ints wholestage off 157742 / 158892 0.1 7521.7 1.0X -Join w 2 ints wholestage on 134290 / 152917 0.2 6403.4 1.2X - -Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 -Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz +Join w 2 ints wholestage off 173174 / 173183 0.1 8257.6 1.0X +Join w 2 ints wholestage on 166350 / 198362 0.1 7932.2 1.0X +OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz Join w 2 longs: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Join w 2 longs wholestage off 5578 / 6507 3.8 266.0 1.0X -Join w 2 longs wholestage on 2198 / 2638 9.5 104.8 2.5X - -Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 -Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz +Join w 2 longs wholestage off 7055 / 7214 3.0 336.4 1.0X +Join w 2 longs wholestage on 1869 / 1985 11.2 89.1 3.8X +OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz Join w 2 longs duplicated: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Join w 2 longs duplicated wholestage off 11109 / 11125 1.9 529.7 1.0X -Join w 2 longs duplicated wholestage on 1743 / 1946 12.0 83.1 6.4X - -Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 -Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz +Join w 2 longs duplicated wholestage off 19256 / 20283 1.1 918.2 1.0X +Join w 2 longs duplicated wholestage on 2467 / 2544 8.5 117.7 7.8X +OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz outer join w long: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -outer join w long wholestage off 2508 / 2527 8.4 119.6 1.0X -outer join w long wholestage on 148 / 159 141.3 7.1 16.9X - -Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 -Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz +outer join w long wholestage off 3756 / 3761 5.6 179.1 1.0X +outer join w long wholestage on 218 / 250 96.2 10.4 17.2X +OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz semi join w long: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -semi join w long wholestage off 1662 / 1719 12.6 79.3 1.0X -semi join w long wholestage on 150 / 168 140.2 7.1 11.1X - -Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 -Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz +semi join w long wholestage off 2393 / 2416 8.8 114.1 1.0X +semi join w long wholestage on 214 / 218 97.9 10.2 11.2X +OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz merge join: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -merge join wholestage off 819 / 833 2.6 390.5 1.0X -merge join wholestage on 591 / 619 3.5 281.7 1.4X - -Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 -Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz +merge join wholestage off 2318 / 2392 0.9 1105.3 1.0X +merge join wholestage on 1669 / 1811 1.3 795.9 1.4X +OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz sort merge join with duplicates: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -sort merge join with duplicates wholestage off 1458 / 1497 1.4 695.3 1.0X -sort merge join with duplicates wholestage on 1129 / 1164 1.9 538.1 1.3X - -Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 -Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz +sort merge join with duplicates wholestage off 2966 / 2976 0.7 1414.5 1.0X +sort merge join with duplicates wholestage on 2413 / 2641 0.9 1150.5 1.2X +OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz shuffle hash join: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -shuffle hash join wholestage off 959 / 970 4.4 228.7 1.0X -shuffle hash join wholestage on 817 / 850 5.1 194.9 1.2X +shuffle hash join wholestage off 1475 / 1479 2.8 351.7 1.0X +shuffle hash join wholestage on 1209 / 1238 3.5 288.3 1.2X From cd8b664e17ce613061cf046ee2b5c3f223c1afa7 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Fri, 12 Oct 2018 16:45:50 +0800 Subject: [PATCH 7/7] merge join -> sort merge join --- .../test/scala/org/apache/spark/benchmark/Benchmark.scala | 3 ++- sql/core/benchmarks/JoinBenchmark-results.txt | 6 +++--- .../spark/sql/execution/benchmark/JoinBenchmark.scala | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala b/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala index 7a36b5f02dc4..bb389cdb39df 100644 --- a/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala +++ b/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala @@ -200,11 +200,12 @@ private[spark] object Benchmark { def getProcessorName(): String = { val cpu = if (SystemUtils.IS_OS_MAC_OSX) { Utils.executeAndGetOutput(Seq("/usr/sbin/sysctl", "-n", "machdep.cpu.brand_string")) + .stripLineEnd } else if (SystemUtils.IS_OS_LINUX) { Try { val grepPath = Utils.executeAndGetOutput(Seq("which", "grep")).stripLineEnd Utils.executeAndGetOutput(Seq(grepPath, "-m", "1", "model name", "/proc/cpuinfo")) - .stripLineEnd.replaceFirst("model name[\\s*]:[\\s*]", "") + .stripLineEnd.replaceFirst("model name[\\s*]:[\\s*]", "") }.getOrElse("Unknown processor") } else { System.getenv("PROCESSOR_IDENTIFIER") diff --git a/sql/core/benchmarks/JoinBenchmark-results.txt b/sql/core/benchmarks/JoinBenchmark-results.txt index 005ffc2158d0..8ceb5e7a7fe9 100644 --- a/sql/core/benchmarks/JoinBenchmark-results.txt +++ b/sql/core/benchmarks/JoinBenchmark-results.txt @@ -53,10 +53,10 @@ semi join w long wholestage on 214 / 218 97.9 OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -merge join: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +sort merge join: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -merge join wholestage off 2318 / 2392 0.9 1105.3 1.0X -merge join wholestage on 1669 / 1811 1.3 795.9 1.4X +sort merge join wholestage off 2318 / 2392 0.9 1105.3 1.0X +sort merge join wholestage on 1669 / 1811 1.3 795.9 1.4X OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/JoinBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/JoinBenchmark.scala index 001726510855..7bad4cb927b4 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/JoinBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/JoinBenchmark.scala @@ -126,7 +126,7 @@ object JoinBenchmark extends SqlBasedBenchmark { def sortMergeJoin(): Unit = { val N = 2 << 20 - codegenBenchmark("merge join", N) { + codegenBenchmark("sort merge join", N) { val df1 = spark.range(N).selectExpr(s"id * 2 as k1") val df2 = spark.range(N).selectExpr(s"id * 3 as k2") val df = df1.join(df2, col("k1") === col("k2"))