diff --git a/sql/core/benchmarks/ExtractBenchmark-results.txt b/sql/core/benchmarks/ExtractBenchmark-results.txt index 7ee185e9adc5..31ad7878ee64 100644 --- a/sql/core/benchmarks/ExtractBenchmark-results.txt +++ b/sql/core/benchmarks/ExtractBenchmark-results.txt @@ -1,145 +1,100 @@ -================================================================================================ -Extract -================================================================================================ - -OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -cast to timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------- -cast to timestamp wholestage off 407 432 36 24.6 40.7 1.0X -cast to timestamp wholestage on 348 396 80 28.7 34.8 1.2X - -OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -MILLENNIUM of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------- -MILLENNIUM of timestamp wholestage off 1407 1408 2 7.1 140.7 1.0X -MILLENNIUM of timestamp wholestage on 1334 1380 81 7.5 133.4 1.1X - -OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -CENTURY of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------- -CENTURY of timestamp wholestage off 1362 1364 3 7.3 136.2 1.0X -CENTURY of timestamp wholestage on 1334 1342 8 7.5 133.4 1.0X - -OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -DECADE of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------- -DECADE of timestamp wholestage off 1226 1229 4 8.2 122.6 1.0X -DECADE of timestamp wholestage on 1218 1225 8 8.2 121.8 1.0X - -OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -YEAR of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------- -YEAR of timestamp wholestage off 1207 1210 4 8.3 120.7 1.0X -YEAR of timestamp wholestage on 1201 1216 17 8.3 120.1 1.0X - -OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -ISOYEAR of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------- -ISOYEAR of timestamp wholestage off 1442 1446 6 6.9 144.2 1.0X -ISOYEAR of timestamp wholestage on 1315 1336 18 7.6 131.5 1.1X - -OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -QUARTER of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------- -QUARTER of timestamp wholestage off 1443 1454 16 6.9 144.3 1.0X -QUARTER of timestamp wholestage on 1429 1442 9 7.0 142.9 1.0X - -OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -MONTH of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------- -MONTH of timestamp wholestage off 1196 1200 5 8.4 119.6 1.0X -MONTH of timestamp wholestage on 1192 1204 10 8.4 119.2 1.0X - -OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -WEEK of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------- -WEEK of timestamp wholestage off 2103 2104 2 4.8 210.3 1.0X -WEEK of timestamp wholestage on 1798 1804 8 5.6 179.8 1.2X - -OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -DAY of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------- -DAY of timestamp wholestage off 1211 1228 23 8.3 121.1 1.0X -DAY of timestamp wholestage on 1204 1212 6 8.3 120.4 1.0X - -OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -DAYOFWEEK of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------- -DAYOFWEEK of timestamp wholestage off 1387 1389 3 7.2 138.7 1.0X -DAYOFWEEK of timestamp wholestage on 1353 1360 8 7.4 135.3 1.0X - -OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -DOW of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------- -DOW of timestamp wholestage off 1373 1373 0 7.3 137.3 1.0X -DOW of timestamp wholestage on 1361 1372 15 7.3 136.1 1.0X - -OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -ISODOW of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------- -ISODOW of timestamp wholestage off 1311 1366 77 7.6 131.1 1.0X -ISODOW of timestamp wholestage on 1307 1314 6 7.7 130.7 1.0X - -OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -DOY of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------- -DOY of timestamp wholestage off 1241 1243 2 8.1 124.1 1.0X -DOY of timestamp wholestage on 1229 1239 9 8.1 122.9 1.0X - -OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -HOUR of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------- -HOUR of timestamp wholestage off 353 358 8 28.3 35.3 1.0X -HOUR of timestamp wholestage on 358 365 5 27.9 35.8 1.0X - -OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -MINUTE of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------- -MINUTE of timestamp wholestage off 353 354 2 28.3 35.3 1.0X -MINUTE of timestamp wholestage on 362 368 9 27.6 36.2 1.0X - -OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -SECOND of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------- -SECOND of timestamp wholestage off 341 350 13 29.3 34.1 1.0X -SECOND of timestamp wholestage on 362 368 7 27.6 36.2 0.9X - -OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -MILLISECONDS of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------- -MILLISECONDS of timestamp wholestage off 36785 36808 32 0.3 3678.5 1.0X -MILLISECONDS of timestamp wholestage on 36644 36760 72 0.3 3664.4 1.0X - -OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -MICROSECONDS of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------- -MICROSECONDS of timestamp wholestage off 446 447 0 22.4 44.6 1.0X -MICROSECONDS of timestamp wholestage on 458 463 4 21.8 45.8 1.0X - -OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -EPOCH of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------- -EPOCH of timestamp wholestage off 29807 29811 5 0.3 2980.7 1.0X -EPOCH of timestamp wholestage on 29843 29930 64 0.3 2984.3 1.0X - +Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.6 +Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz +Invoke extract for timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +cast to timestamp 264 281 25 37.9 26.4 1.0X +MILLENNIUM of timestamp 1187 1198 16 8.4 118.7 0.2X +CENTURY of timestamp 1122 1124 2 8.9 112.2 0.2X +DECADE of timestamp 1041 1049 7 9.6 104.1 0.3X +YEAR of timestamp 1027 1035 6 9.7 102.7 0.3X +ISOYEAR of timestamp 1155 1171 27 8.7 115.5 0.2X +QUARTER of timestamp 1181 1192 10 8.5 118.1 0.2X +MONTH of timestamp 1023 1030 7 9.8 102.3 0.3X +WEEK of timestamp 1511 1517 5 6.6 151.1 0.2X +DAY of timestamp 1010 1016 6 9.9 101.0 0.3X +DAYOFWEEK of timestamp 1127 1129 4 8.9 112.7 0.2X +DOW of timestamp 1123 1130 6 8.9 112.3 0.2X +ISODOW of timestamp 1099 1105 6 9.1 109.9 0.2X +DOY of timestamp 1029 1030 1 9.7 102.9 0.3X +HOUR of timestamp 415 417 1 24.1 41.5 0.6X +MINUTE of timestamp 409 418 13 24.4 40.9 0.6X +SECOND of timestamp 408 413 8 24.5 40.8 0.6X +MILLISECONDS of timestamp 28956 29040 73 0.3 2895.6 0.0X +MICROSECONDS of timestamp 504 519 13 19.8 50.4 0.5X +EPOCH of timestamp 23543 23566 28 0.4 2354.3 0.0X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.6 +Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz +Invoke extract for date: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +cast to date 853 856 6 11.7 85.3 1.0X +MILLENNIUM of date 1081 1088 6 9.2 108.1 0.8X +CENTURY of date 1087 1090 4 9.2 108.7 0.8X +DECADE of date 1018 1019 1 9.8 101.8 0.8X +YEAR of date 996 1006 9 10.0 99.6 0.9X +ISOYEAR of date 1133 1147 21 8.8 113.3 0.8X +QUARTER of date 1246 1254 10 8.0 124.6 0.7X +MONTH of date 998 1002 4 10.0 99.8 0.9X +WEEK of date 1483 1490 7 6.7 148.3 0.6X +DAY of date 992 998 5 10.1 99.2 0.9X +DAYOFWEEK of date 1121 1128 7 8.9 112.1 0.8X +DOW of date 1118 1126 8 8.9 111.8 0.8X +ISODOW of date 1093 1103 9 9.1 109.3 0.8X +DOY of date 1026 1032 5 9.7 102.6 0.8X +HOUR of date 1707 1726 24 5.9 170.7 0.5X +MINUTE of date 1710 1731 19 5.8 171.0 0.5X +SECOND of date 1701 1720 19 5.9 170.1 0.5X +MILLISECONDS of date 2256 2272 19 4.4 225.6 0.4X +MICROSECONDS of date 1801 1810 11 5.6 180.1 0.5X +EPOCH of date 24848 24860 17 0.4 2484.8 0.0X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.6 +Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz +Invoke date_part for timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +cast to timestamp 227 253 39 44.0 22.7 1.0X +MILLENNIUM of timestamp 1121 1127 9 8.9 112.1 0.2X +CENTURY of timestamp 1084 1094 17 9.2 108.4 0.2X +DECADE of timestamp 1015 1037 35 9.8 101.5 0.2X +YEAR of timestamp 1011 1013 4 9.9 101.1 0.2X +ISOYEAR of timestamp 1121 1126 7 8.9 112.1 0.2X +QUARTER of timestamp 1243 1247 4 8.0 124.3 0.2X +MONTH of timestamp 1000 1009 14 10.0 100.0 0.2X +WEEK of timestamp 1481 1492 11 6.8 148.1 0.2X +DAY of timestamp 999 1000 1 10.0 99.9 0.2X +DAYOFWEEK of timestamp 1114 1132 20 9.0 111.4 0.2X +DOW of timestamp 1135 1147 17 8.8 113.5 0.2X +ISODOW of timestamp 1070 1079 10 9.3 107.0 0.2X +DOY of timestamp 1018 1027 8 9.8 101.8 0.2X +HOUR of timestamp 413 419 9 24.2 41.3 0.6X +MINUTE of timestamp 411 414 3 24.3 41.1 0.6X +SECOND of timestamp 410 413 3 24.4 41.0 0.6X +MILLISECONDS of timestamp 29225 29264 52 0.3 2922.5 0.0X +MICROSECONDS of timestamp 507 512 7 19.7 50.7 0.4X +EPOCH of timestamp 23565 23608 56 0.4 2356.5 0.0X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.6 +Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz +Invoke date_part for date: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +cast to date 848 852 3 11.8 84.8 1.0X +MILLENNIUM of date 1083 1086 3 9.2 108.3 0.8X +CENTURY of date 1081 1084 5 9.3 108.1 0.8X +DECADE of date 1010 1014 4 9.9 101.0 0.8X +YEAR of date 992 1000 9 10.1 99.2 0.9X +ISOYEAR of date 1111 1116 6 9.0 111.1 0.8X +QUARTER of date 1230 1249 17 8.1 123.0 0.7X +MONTH of date 996 1008 11 10.0 99.6 0.9X +WEEK of date 1487 1516 28 6.7 148.7 0.6X +DAY of date 993 999 6 10.1 99.3 0.9X +DAYOFWEEK of date 1113 1118 5 9.0 111.3 0.8X +DOW of date 1113 1116 2 9.0 111.3 0.8X +ISODOW of date 1069 1072 3 9.4 106.9 0.8X +DOY of date 1027 1028 1 9.7 102.7 0.8X +HOUR of date 1707 1710 3 5.9 170.7 0.5X +MINUTE of date 1704 1705 2 5.9 170.4 0.5X +SECOND of date 1701 1705 4 5.9 170.1 0.5X +MILLISECONDS of date 2229 2238 9 4.5 222.9 0.4X +MICROSECONDS of date 1801 1808 12 5.6 180.1 0.5X +EPOCH of date 24783 24817 31 0.4 2478.3 0.0X diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ExtractBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ExtractBenchmark.scala index dbbad43efa08..b9086aa5cb37 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ExtractBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ExtractBenchmark.scala @@ -19,6 +19,11 @@ package org.apache.spark.sql.execution.benchmark import java.time.Instant +import org.apache.spark.benchmark.{Benchmark, BenchmarkBase} +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.plans.SQLHelper +import org.apache.spark.sql.internal.SQLConf + /** * Synthetic benchmark for the extract function. * To run this benchmark: @@ -31,52 +36,76 @@ import java.time.Instant * Results will be written to "benchmarks/ExtractBenchmark-results.txt". * }}} */ -object ExtractBenchmark extends SqlBasedBenchmark { +object ExtractBenchmark extends BenchmarkBase with SQLHelper { + private val spark: SparkSession = SparkSession.builder() + .master("local[1]") + .appName(this.getClass.getCanonicalName) + .getOrCreate() + private def doBenchmark(cardinality: Long, exprs: String*): Unit = { val sinceSecond = Instant.parse("2010-01-01T00:00:00Z").getEpochSecond - spark - .range(sinceSecond, sinceSecond + cardinality, 1, 1) - .selectExpr(exprs: _*) - .write - .format("noop") - .save() + withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "true") { + spark + .range(sinceSecond, sinceSecond + cardinality, 1, 1) + .selectExpr(exprs: _*) + .write + .format("noop") + .save() + } } - private def run(cardinality: Long, name: String, exprs: String*): Unit = { - codegenBenchmark(name, cardinality) { + private def run( + benchmark: Benchmark, + cardinality: Long, + name: String, + exprs: String*): Unit = { + benchmark.addCase(name, numIters = 3) { _ => doBenchmark(cardinality, exprs: _*) } } - private def run(cardinality: Long, field: String): Unit = { - codegenBenchmark(s"$field of timestamp", cardinality) { - doBenchmark(cardinality, s"EXTRACT($field FROM (cast(id as timestamp)))") + private def castExpr(from: String): String = from match { + case "timestamp" => s"cast(id as timestamp)" + case "date" => s"cast(cast(id as timestamp) as date)" + case other => throw new IllegalArgumentException( + s"Unsupported column type $other. Valid column types are 'timestamp' and 'date'") + } + + private def run( + benchmark: Benchmark, + func: String, + cardinality: Long, + field: String, + from: String): Unit = { + val expr = func match { + case "extract" => s"EXTRACT($field FROM ${castExpr(from)})" + case "date_part" => s"DATE_PART('$field', ${castExpr(from)})" + case other => throw new IllegalArgumentException( + s"Unsupported function '$other'. Valid functions are 'extract' and 'date_part'.") + } + benchmark.addCase(s"$field of $from", numIters = 3) { _ => + doBenchmark(cardinality, expr) } } override def runBenchmarkSuite(mainArgs: Array[String]): Unit = { val N = 10000000L - runBenchmark("Extract") { - run(N, "cast to timestamp", "cast(id as timestamp)") - run(N, "MILLENNIUM") - run(N, "CENTURY") - run(N, "DECADE") - run(N, "YEAR") - run(N, "ISOYEAR") - run(N, "QUARTER") - run(N, "MONTH") - run(N, "WEEK") - run(N, "DAY") - run(N, "DAYOFWEEK") - run(N, "DOW") - run(N, "ISODOW") - run(N, "DOY") - run(N, "HOUR") - run(N, "MINUTE") - run(N, "SECOND") - run(N, "MILLISECONDS") - run(N, "MICROSECONDS") - run(N, "EPOCH") + val fields = Seq( + "MILLENNIUM", "CENTURY", "DECADE", "YEAR", + "ISOYEAR", "QUARTER", "MONTH", "WEEK", + "DAY", "DAYOFWEEK", "DOW", "ISODOW", + "DOY", "HOUR", "MINUTE", "SECOND", + "MILLISECONDS", "MICROSECONDS", "EPOCH") + + Seq("extract", "date_part").foreach { func => + Seq("timestamp", "date").foreach { dateType => + val benchmark = new Benchmark(s"Invoke $func for $dateType", N, output = output) + + run(benchmark, N, s"cast to $dateType", castExpr(dateType)) + fields.foreach(run(benchmark, func, N, _, dateType)) + + benchmark.run() + } } } }