diff --git a/external/avro/benchmarks/AvroReadBenchmark-results.txt b/external/avro/benchmarks/AvroReadBenchmark-results.txt index 7900fea453b1..8a12d9d15d91 100644 --- a/external/avro/benchmarks/AvroReadBenchmark-results.txt +++ b/external/avro/benchmarks/AvroReadBenchmark-results.txt @@ -2,121 +2,121 @@ SQL Single Numeric Column Scan ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -SQL Single TINYINT Column Scan: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Sum 2774 / 2815 5.7 176.4 1.0X +SQL Single TINYINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Sum 2958 3072 161 5.3 188.1 1.0X -OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -SQL Single SMALLINT Column Scan: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Sum 2761 / 2777 5.7 175.5 1.0X +SQL Single SMALLINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Sum 2856 2866 15 5.5 181.6 1.0X -OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -SQL Single INT Column Scan: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Sum 2783 / 2870 5.7 176.9 1.0X +SQL Single INT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Sum 2883 2890 10 5.5 183.3 1.0X -OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -SQL Single BIGINT Column Scan: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Sum 3256 / 3266 4.8 207.0 1.0X +SQL Single BIGINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Sum 3336 3360 34 4.7 212.1 1.0X -OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -SQL Single FLOAT Column Scan: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Sum 2841 / 2867 5.5 180.6 1.0X +SQL Single FLOAT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Sum 2728 2760 45 5.8 173.5 1.0X -OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -SQL Single DOUBLE Column Scan: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Sum 2981 / 2996 5.3 189.5 1.0X +SQL Single DOUBLE Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Sum 2826 2833 9 5.6 179.7 1.0X ================================================================================================ Int and String Scan ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Int and String Scan: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Sum of columns 4781 / 4783 2.2 456.0 1.0X +Int and String Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Sum of columns 4682 4704 30 2.2 446.6 1.0X ================================================================================================ Partitioned Table Scan ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Partitioned Table: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Data column 3372 / 3386 4.7 214.4 1.0X -Partition column 3035 / 3064 5.2 193.0 1.1X -Both columns 3445 / 3461 4.6 219.1 1.0X +Partitioned Table: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Data column 3206 3265 84 4.9 203.8 1.0X +Partition column 3314 3316 3 4.7 210.7 1.0X +Both columns 3353 3367 20 4.7 213.2 1.0X ================================================================================================ Repeated String Scan ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Repeated String: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Sum of string length 3395 / 3401 3.1 323.8 1.0X +Repeated String: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Sum of string length 3364 3367 4 3.1 320.8 1.0X ================================================================================================ String with Nulls Scan ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -String with Nulls Scan (0.0%): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Sum of string length 5580 / 5624 1.9 532.2 1.0X +String with Nulls Scan (0.0%): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Sum of string length 5403 5410 10 1.9 515.3 1.0X -OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -String with Nulls Scan (50.0%): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Sum of string length 4622 / 4623 2.3 440.8 1.0X +String with Nulls Scan (50.0%): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Sum of string length 4524 4526 2 2.3 431.5 1.0X -OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -String with Nulls Scan (95.0%): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Sum of string length 3238 / 3241 3.2 308.8 1.0X +String with Nulls Scan (95.0%): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Sum of string length 3121 3129 11 3.4 297.7 1.0X ================================================================================================ Single Column Scan From Wide Columns ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Single Column Scan from 100 columns: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Sum of single column 5472 / 5484 0.2 5218.8 1.0X +Single Column Scan from 100 columns: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Sum of single column 4537 4541 6 0.2 4326.9 1.0X -OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Single Column Scan from 200 columns: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Sum of single column 10680 / 10701 0.1 10185.1 1.0X +Single Column Scan from 200 columns: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Sum of single column 8768 8776 11 0.1 8361.8 1.0X -OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Single Column Scan from 300 columns: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Sum of single column 16143 / 16238 0.1 15394.9 1.0X +Single Column Scan from 300 columns: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Sum of single column 13042 13064 31 0.1 12437.6 1.0X diff --git a/external/avro/src/test/scala/org/apache/spark/sql/execution/benchmark/AvroReadBenchmark.scala b/external/avro/src/test/scala/org/apache/spark/sql/execution/benchmark/AvroReadBenchmark.scala index f2f7d650066f..a16126ae2424 100644 --- a/external/avro/src/test/scala/org/apache/spark/sql/execution/benchmark/AvroReadBenchmark.scala +++ b/external/avro/src/test/scala/org/apache/spark/sql/execution/benchmark/AvroReadBenchmark.scala @@ -22,7 +22,6 @@ import scala.util.Random import org.apache.spark.benchmark.Benchmark import org.apache.spark.sql.DataFrame -import org.apache.spark.sql.catalyst.plans.SQLHelper import org.apache.spark.sql.types._ /** @@ -36,7 +35,7 @@ import org.apache.spark.sql.types._ * Results will be written to "benchmarks/AvroReadBenchmark-results.txt". * }}} */ -object AvroReadBenchmark extends SqlBasedBenchmark with SQLHelper { +object AvroReadBenchmark extends SqlBasedBenchmark { def withTempTable(tableNames: String*)(f: => Unit): Unit = { try f finally tableNames.foreach(spark.catalog.dropTempView) } diff --git a/sql/core/benchmarks/DataSourceReadBenchmark-results.txt b/sql/core/benchmarks/DataSourceReadBenchmark-results.txt index f547f61654b5..efb0601cc5d6 100644 --- a/sql/core/benchmarks/DataSourceReadBenchmark-results.txt +++ b/sql/core/benchmarks/DataSourceReadBenchmark-results.txt @@ -2,251 +2,251 @@ SQL Single Numeric Column Scan ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -SQL Single TINYINT Column Scan: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -SQL CSV 26366 / 26562 0.6 1676.3 1.0X -SQL Json 8709 / 8724 1.8 553.7 3.0X -SQL Parquet Vectorized 166 / 187 94.8 10.5 159.0X -SQL Parquet MR 1706 / 1720 9.2 108.4 15.5X -SQL ORC Vectorized 167 / 174 94.2 10.6 157.9X -SQL ORC MR 1433 / 1465 11.0 91.1 18.4X - -OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Parquet Reader Single TINYINT Column Scan: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -ParquetReader Vectorized 200 / 207 78.7 12.7 1.0X -ParquetReader Vectorized -> Row 117 / 119 134.7 7.4 1.7X - -OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -SQL Single SMALLINT Column Scan: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -SQL CSV 26489 / 26547 0.6 1684.1 1.0X -SQL Json 8990 / 8998 1.7 571.5 2.9X -SQL Parquet Vectorized 209 / 221 75.1 13.3 126.5X -SQL Parquet MR 1949 / 1949 8.1 123.9 13.6X -SQL ORC Vectorized 221 / 228 71.3 14.0 120.1X -SQL ORC MR 1527 / 1549 10.3 97.1 17.3X - -OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Parquet Reader Single SMALLINT Column Scan: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -ParquetReader Vectorized 286 / 296 54.9 18.2 1.0X -ParquetReader Vectorized -> Row 249 / 253 63.1 15.8 1.1X - -OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -SQL Single INT Column Scan: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -SQL CSV 27701 / 27744 0.6 1761.2 1.0X -SQL Json 9703 / 9733 1.6 616.9 2.9X -SQL Parquet Vectorized 176 / 182 89.2 11.2 157.0X -SQL Parquet MR 2164 / 2173 7.3 137.6 12.8X -SQL ORC Vectorized 307 / 314 51.2 19.5 90.2X -SQL ORC MR 1690 / 1700 9.3 107.4 16.4X - -OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Parquet Reader Single INT Column Scan: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -ParquetReader Vectorized 259 / 277 60.7 16.5 1.0X -ParquetReader Vectorized -> Row 261 / 265 60.3 16.6 1.0X - -OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -SQL Single BIGINT Column Scan: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -SQL CSV 34813 / 34900 0.5 2213.3 1.0X -SQL Json 12570 / 12617 1.3 799.2 2.8X -SQL Parquet Vectorized 270 / 308 58.2 17.2 128.9X -SQL Parquet MR 2427 / 2431 6.5 154.3 14.3X -SQL ORC Vectorized 388 / 398 40.6 24.6 89.8X -SQL ORC MR 1819 / 1851 8.6 115.7 19.1X - -OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Parquet Reader Single BIGINT Column Scan: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -ParquetReader Vectorized 372 / 379 42.3 23.7 1.0X -ParquetReader Vectorized -> Row 357 / 368 44.1 22.7 1.0X - -OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -SQL Single FLOAT Column Scan: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -SQL CSV 28753 / 28781 0.5 1828.0 1.0X -SQL Json 12039 / 12215 1.3 765.4 2.4X -SQL Parquet Vectorized 170 / 177 92.4 10.8 169.0X -SQL Parquet MR 2184 / 2196 7.2 138.9 13.2X -SQL ORC Vectorized 432 / 440 36.4 27.5 66.5X -SQL ORC MR 1812 / 1833 8.7 115.2 15.9X - -OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Parquet Reader Single FLOAT Column Scan: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -ParquetReader Vectorized 253 / 260 62.2 16.1 1.0X -ParquetReader Vectorized -> Row 256 / 257 61.6 16.2 1.0X - -OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -SQL Single DOUBLE Column Scan: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -SQL CSV 36177 / 36188 0.4 2300.1 1.0X -SQL Json 18895 / 18898 0.8 1201.3 1.9X -SQL Parquet Vectorized 267 / 276 58.9 17.0 135.6X -SQL Parquet MR 2355 / 2363 6.7 149.7 15.4X -SQL ORC Vectorized 543 / 546 29.0 34.5 66.6X -SQL ORC MR 2246 / 2258 7.0 142.8 16.1X - -OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Parquet Reader Single DOUBLE Column Scan: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -ParquetReader Vectorized 353 / 367 44.6 22.4 1.0X -ParquetReader Vectorized -> Row 351 / 357 44.7 22.3 1.0X +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +SQL Single TINYINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +SQL CSV 23939 24126 265 0.7 1522.0 1.0X +SQL Json 8908 9008 142 1.8 566.4 2.7X +SQL Parquet Vectorized 192 229 36 82.1 12.2 125.0X +SQL Parquet MR 2356 2363 10 6.7 149.8 10.2X +SQL ORC Vectorized 329 347 25 47.9 20.9 72.9X +SQL ORC MR 1711 1747 50 9.2 108.8 14.0X + +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +Parquet Reader Single TINYINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +ParquetReader Vectorized 194 197 4 81.1 12.3 1.0X +ParquetReader Vectorized -> Row 97 102 13 162.3 6.2 2.0X + +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +SQL Single SMALLINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +SQL CSV 24603 24607 6 0.6 1564.2 1.0X +SQL Json 9587 9652 92 1.6 609.5 2.6X +SQL Parquet Vectorized 227 241 13 69.4 14.4 108.6X +SQL Parquet MR 2432 2441 12 6.5 154.6 10.1X +SQL ORC Vectorized 320 327 8 49.2 20.3 76.9X +SQL ORC MR 1889 1921 46 8.3 120.1 13.0X + +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +Parquet Reader Single SMALLINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +ParquetReader Vectorized 290 294 8 54.3 18.4 1.0X +ParquetReader Vectorized -> Row 252 256 5 62.4 16.0 1.2X + +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +SQL Single INT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +SQL CSV 26742 26743 1 0.6 1700.2 1.0X +SQL Json 10855 10855 0 1.4 690.1 2.5X +SQL Parquet Vectorized 195 202 7 80.8 12.4 137.3X +SQL Parquet MR 2805 2806 0 5.6 178.4 9.5X +SQL ORC Vectorized 376 383 5 41.8 23.9 71.1X +SQL ORC MR 2021 2092 102 7.8 128.5 13.2X + +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +Parquet Reader Single INT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +ParquetReader Vectorized 248 253 5 63.4 15.8 1.0X +ParquetReader Vectorized -> Row 249 251 2 63.1 15.9 1.0X + +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +SQL Single BIGINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +SQL CSV 34841 34855 20 0.5 2215.1 1.0X +SQL Json 14121 14133 18 1.1 897.8 2.5X +SQL Parquet Vectorized 288 303 17 54.7 18.3 121.2X +SQL Parquet MR 3178 3197 27 4.9 202.0 11.0X +SQL ORC Vectorized 465 476 8 33.8 29.6 74.9X +SQL ORC MR 2255 2260 6 7.0 143.4 15.4X + +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +Parquet Reader Single BIGINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +ParquetReader Vectorized 344 354 11 45.8 21.8 1.0X +ParquetReader Vectorized -> Row 383 385 3 41.1 24.3 0.9X + +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +SQL Single FLOAT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +SQL CSV 29336 29563 322 0.5 1865.1 1.0X +SQL Json 13452 13544 130 1.2 855.3 2.2X +SQL Parquet Vectorized 186 200 22 84.8 11.8 158.1X +SQL Parquet MR 2752 2815 90 5.7 175.0 10.7X +SQL ORC Vectorized 460 465 6 34.2 29.3 63.7X +SQL ORC MR 2054 2072 26 7.7 130.6 14.3X + +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +Parquet Reader Single FLOAT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +ParquetReader Vectorized 244 246 4 64.6 15.5 1.0X +ParquetReader Vectorized -> Row 247 250 4 63.7 15.7 1.0X + +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +SQL Single DOUBLE Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +SQL CSV 37812 37897 120 0.4 2404.0 1.0X +SQL Json 19499 19509 15 0.8 1239.7 1.9X +SQL Parquet Vectorized 284 292 10 55.4 18.1 133.2X +SQL Parquet MR 3236 3248 17 4.9 205.7 11.7X +SQL ORC Vectorized 542 558 18 29.0 34.4 69.8X +SQL ORC MR 2273 2298 36 6.9 144.5 16.6X + +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +Parquet Reader Single DOUBLE Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +ParquetReader Vectorized 342 352 13 46.0 21.7 1.0X +ParquetReader Vectorized -> Row 341 344 3 46.1 21.7 1.0X ================================================================================================ Int and String Scan ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Int and String Scan: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -SQL CSV 21130 / 21246 0.5 2015.1 1.0X -SQL Json 12145 / 12174 0.9 1158.2 1.7X -SQL Parquet Vectorized 2363 / 2377 4.4 225.3 8.9X -SQL Parquet MR 4555 / 4557 2.3 434.4 4.6X -SQL ORC Vectorized 2361 / 2388 4.4 225.1 9.0X -SQL ORC MR 4186 / 4209 2.5 399.2 5.0X +Int and String Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +SQL CSV 26777 26806 41 0.4 2553.7 1.0X +SQL Json 13894 14071 251 0.8 1325.0 1.9X +SQL Parquet Vectorized 2351 2404 75 4.5 224.2 11.4X +SQL Parquet MR 5198 5219 29 2.0 495.8 5.2X +SQL ORC Vectorized 2434 2435 1 4.3 232.1 11.0X +SQL ORC MR 4281 4345 91 2.4 408.3 6.3X ================================================================================================ Repeated String Scan ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Repeated String: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -SQL CSV 11693 / 11729 0.9 1115.1 1.0X -SQL Json 7025 / 7025 1.5 669.9 1.7X -SQL Parquet Vectorized 803 / 821 13.1 76.6 14.6X -SQL Parquet MR 1776 / 1790 5.9 169.4 6.6X -SQL ORC Vectorized 491 / 494 21.4 46.8 23.8X -SQL ORC MR 2050 / 2063 5.1 195.5 5.7X +Repeated String: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +SQL CSV 15779 16507 1029 0.7 1504.8 1.0X +SQL Json 7866 7877 14 1.3 750.2 2.0X +SQL Parquet Vectorized 820 826 5 12.8 78.2 19.2X +SQL Parquet MR 2646 2658 17 4.0 252.4 6.0X +SQL ORC Vectorized 638 644 7 16.4 60.9 24.7X +SQL ORC MR 2205 2222 25 4.8 210.3 7.2X ================================================================================================ Partitioned Table Scan ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Partitioned Table: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Data column - CSV 30965 / 31041 0.5 1968.7 1.0X -Data column - Json 12876 / 12882 1.2 818.6 2.4X -Data column - Parquet Vectorized 277 / 282 56.7 17.6 111.6X -Data column - Parquet MR 3398 / 3402 4.6 216.0 9.1X -Data column - ORC Vectorized 399 / 407 39.4 25.4 77.5X -Data column - ORC MR 2583 / 2589 6.1 164.2 12.0X -Partition column - CSV 7403 / 7427 2.1 470.7 4.2X -Partition column - Json 5587 / 5625 2.8 355.2 5.5X -Partition column - Parquet Vectorized 71 / 78 222.6 4.5 438.3X -Partition column - Parquet MR 1798 / 1808 8.7 114.3 17.2X -Partition column - ORC Vectorized 72 / 75 219.0 4.6 431.2X -Partition column - ORC MR 1772 / 1778 8.9 112.6 17.5X -Both columns - CSV 30211 / 30212 0.5 1920.7 1.0X -Both columns - Json 13382 / 13391 1.2 850.8 2.3X -Both columns - Parquet Vectorized 321 / 333 49.0 20.4 96.4X -Both columns - Parquet MR 3656 / 3661 4.3 232.4 8.5X -Both columns - ORC Vectorized 443 / 448 35.5 28.2 69.9X -Both columns - ORC MR 2626 / 2633 6.0 167.0 11.8X +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +Partitioned Table: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Data column - CSV 38142 38183 58 0.4 2425.0 1.0X +Data column - Json 14664 14667 4 1.1 932.3 2.6X +Data column - Parquet Vectorized 304 318 13 51.8 19.3 125.7X +Data column - Parquet MR 3378 3384 8 4.7 214.8 11.3X +Data column - ORC Vectorized 475 481 7 33.1 30.2 80.3X +Data column - ORC MR 2324 2356 46 6.8 147.7 16.4X +Partition column - CSV 14680 14742 88 1.1 933.3 2.6X +Partition column - Json 11200 11251 73 1.4 712.1 3.4X +Partition column - Parquet Vectorized 102 111 14 154.7 6.5 375.1X +Partition column - Parquet MR 1477 1483 9 10.7 93.9 25.8X +Partition column - ORC Vectorized 100 112 18 157.4 6.4 381.6X +Partition column - ORC MR 1675 1685 15 9.4 106.5 22.8X +Both columns - CSV 41925 41929 6 0.4 2665.5 0.9X +Both columns - Json 15409 15422 18 1.0 979.7 2.5X +Both columns - Parquet Vectorized 351 358 10 44.8 22.3 108.7X +Both columns - Parquet MR 3719 3720 2 4.2 236.4 10.3X +Both columns - ORC Vectorized 609 630 23 25.8 38.7 62.6X +Both columns - ORC MR 2959 2959 1 5.3 188.1 12.9X ================================================================================================ String with Nulls Scan ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -String with Nulls Scan (0.0%): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -SQL CSV 13918 / 13979 0.8 1327.3 1.0X -SQL Json 10068 / 10068 1.0 960.1 1.4X -SQL Parquet Vectorized 1563 / 1564 6.7 149.0 8.9X -SQL Parquet MR 3835 / 3836 2.7 365.8 3.6X -ParquetReader Vectorized 1115 / 1118 9.4 106.4 12.5X -SQL ORC Vectorized 1172 / 1208 8.9 111.8 11.9X -SQL ORC MR 3708 / 3711 2.8 353.6 3.8X - -OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -String with Nulls Scan (50.0%): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -SQL CSV 13972 / 14043 0.8 1332.5 1.0X -SQL Json 7436 / 7469 1.4 709.1 1.9X -SQL Parquet Vectorized 1103 / 1112 9.5 105.2 12.7X -SQL Parquet MR 2841 / 2847 3.7 271.0 4.9X -ParquetReader Vectorized 992 / 1012 10.6 94.6 14.1X -SQL ORC Vectorized 1275 / 1349 8.2 121.6 11.0X -SQL ORC MR 3244 / 3259 3.2 309.3 4.3X - -OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -String with Nulls Scan (95.0%): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -SQL CSV 11228 / 11244 0.9 1070.8 1.0X -SQL Json 5200 / 5247 2.0 495.9 2.2X -SQL Parquet Vectorized 238 / 242 44.1 22.7 47.2X -SQL Parquet MR 1730 / 1734 6.1 165.0 6.5X -ParquetReader Vectorized 237 / 238 44.3 22.6 47.4X -SQL ORC Vectorized 459 / 462 22.8 43.8 24.4X -SQL ORC MR 1767 / 1783 5.9 168.5 6.4X +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +String with Nulls Scan (0.0%): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +SQL CSV 19510 19709 282 0.5 1860.6 1.0X +SQL Json 11816 11822 8 0.9 1126.9 1.7X +SQL Parquet Vectorized 1535 1548 18 6.8 146.4 12.7X +SQL Parquet MR 5491 5514 33 1.9 523.6 3.6X +ParquetReader Vectorized 1126 1129 5 9.3 107.4 17.3X +SQL ORC Vectorized 1200 1215 21 8.7 114.5 16.3X +SQL ORC MR 3901 3904 4 2.7 372.1 5.0X + +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +String with Nulls Scan (50.0%): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +SQL CSV 21439 21457 26 0.5 2044.6 1.0X +SQL Json 9653 9669 22 1.1 920.6 2.2X +SQL Parquet Vectorized 1126 1131 8 9.3 107.4 19.0X +SQL Parquet MR 3947 3961 19 2.7 376.4 5.4X +ParquetReader Vectorized 998 1023 36 10.5 95.2 21.5X +SQL ORC Vectorized 1274 1277 4 8.2 121.5 16.8X +SQL ORC MR 3424 3425 1 3.1 326.5 6.3X + +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +String with Nulls Scan (95.0%): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +SQL CSV 17885 17893 11 0.6 1705.7 1.0X +SQL Json 5201 5210 13 2.0 496.0 3.4X +SQL Parquet Vectorized 261 267 6 40.2 24.9 68.6X +SQL Parquet MR 2841 2853 18 3.7 270.9 6.3X +ParquetReader Vectorized 244 246 3 43.1 23.2 73.4X +SQL ORC Vectorized 465 468 1 22.5 44.4 38.4X +SQL ORC MR 1904 1945 58 5.5 181.6 9.4X ================================================================================================ Single Column Scan From Wide Columns ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Single Column Scan from 10 columns: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -SQL CSV 3322 / 3356 0.3 3167.9 1.0X -SQL Json 2808 / 2843 0.4 2678.2 1.2X -SQL Parquet Vectorized 56 / 63 18.9 52.9 59.8X -SQL Parquet MR 215 / 219 4.9 205.4 15.4X -SQL ORC Vectorized 64 / 76 16.4 60.9 52.0X -SQL ORC MR 314 / 316 3.3 299.6 10.6X - -OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Single Column Scan from 50 columns: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -SQL CSV 7978 / 7989 0.1 7608.5 1.0X -SQL Json 10294 / 10325 0.1 9816.9 0.8X -SQL Parquet Vectorized 72 / 85 14.5 69.0 110.3X -SQL Parquet MR 237 / 241 4.4 226.4 33.6X -SQL ORC Vectorized 82 / 92 12.7 78.5 97.0X -SQL ORC MR 900 / 909 1.2 858.5 8.9X - -OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Single Column Scan from 100 columns: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -SQL CSV 13489 / 13508 0.1 12864.3 1.0X -SQL Json 18813 / 18827 0.1 17941.4 0.7X -SQL Parquet Vectorized 107 / 111 9.8 101.8 126.3X -SQL Parquet MR 275 / 286 3.8 262.3 49.0X -SQL ORC Vectorized 107 / 115 9.8 101.7 126.4X -SQL ORC MR 1659 / 1664 0.6 1582.3 8.1X +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +Single Column Scan from 10 columns: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +SQL CSV 3841 3861 28 0.3 3663.1 1.0X +SQL Json 3780 3787 10 0.3 3604.6 1.0X +SQL Parquet Vectorized 83 90 10 12.7 79.0 46.4X +SQL Parquet MR 291 303 18 3.6 277.9 13.2X +SQL ORC Vectorized 93 106 20 11.3 88.8 41.2X +SQL ORC MR 217 224 10 4.8 206.6 17.7X + +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +Single Column Scan from 50 columns: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +SQL CSV 8896 8971 106 0.1 8483.9 1.0X +SQL Json 14731 14773 59 0.1 14048.2 0.6X +SQL Parquet Vectorized 120 146 26 8.8 114.0 74.4X +SQL Parquet MR 330 363 33 3.2 314.4 27.0X +SQL ORC Vectorized 122 130 11 8.6 115.9 73.2X +SQL ORC MR 248 254 9 4.2 237.0 35.8X + +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +Single Column Scan from 100 columns: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +SQL CSV 14771 14817 65 0.1 14086.3 1.0X +SQL Json 29677 29787 157 0.0 28302.0 0.5X +SQL Parquet Vectorized 182 191 13 5.8 173.8 81.1X +SQL Parquet MR 1209 1213 5 0.9 1153.1 12.2X +SQL ORC Vectorized 165 176 17 6.3 157.7 89.3X +SQL ORC MR 809 813 4 1.3 771.4 18.3X diff --git a/sql/core/benchmarks/ExtractBenchmark-results.txt b/sql/core/benchmarks/ExtractBenchmark-results.txt index 31ad7878ee64..6c4bdfed703f 100644 --- a/sql/core/benchmarks/ExtractBenchmark-results.txt +++ b/sql/core/benchmarks/ExtractBenchmark-results.txt @@ -1,100 +1,100 @@ -Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.6 -Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz Invoke extract for timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -cast to timestamp 264 281 25 37.9 26.4 1.0X -MILLENNIUM of timestamp 1187 1198 16 8.4 118.7 0.2X -CENTURY of timestamp 1122 1124 2 8.9 112.2 0.2X -DECADE of timestamp 1041 1049 7 9.6 104.1 0.3X -YEAR of timestamp 1027 1035 6 9.7 102.7 0.3X -ISOYEAR of timestamp 1155 1171 27 8.7 115.5 0.2X -QUARTER of timestamp 1181 1192 10 8.5 118.1 0.2X -MONTH of timestamp 1023 1030 7 9.8 102.3 0.3X -WEEK of timestamp 1511 1517 5 6.6 151.1 0.2X -DAY of timestamp 1010 1016 6 9.9 101.0 0.3X -DAYOFWEEK of timestamp 1127 1129 4 8.9 112.7 0.2X -DOW of timestamp 1123 1130 6 8.9 112.3 0.2X -ISODOW of timestamp 1099 1105 6 9.1 109.9 0.2X -DOY of timestamp 1029 1030 1 9.7 102.9 0.3X -HOUR of timestamp 415 417 1 24.1 41.5 0.6X -MINUTE of timestamp 409 418 13 24.4 40.9 0.6X -SECOND of timestamp 408 413 8 24.5 40.8 0.6X -MILLISECONDS of timestamp 28956 29040 73 0.3 2895.6 0.0X -MICROSECONDS of timestamp 504 519 13 19.8 50.4 0.5X -EPOCH of timestamp 23543 23566 28 0.4 2354.3 0.0X +cast to timestamp 397 428 45 25.2 39.7 1.0X +MILLENNIUM of timestamp 1480 1546 67 6.8 148.0 0.3X +CENTURY of timestamp 1368 1384 17 7.3 136.8 0.3X +DECADE of timestamp 1281 1344 57 7.8 128.1 0.3X +YEAR of timestamp 1238 1244 5 8.1 123.8 0.3X +ISOYEAR of timestamp 1379 1455 122 7.2 137.9 0.3X +QUARTER of timestamp 1442 1456 15 6.9 144.2 0.3X +MONTH of timestamp 1213 1217 3 8.2 121.3 0.3X +WEEK of timestamp 1927 1947 22 5.2 192.7 0.2X +DAY of timestamp 1306 1320 16 7.7 130.6 0.3X +DAYOFWEEK of timestamp 1394 1402 11 7.2 139.4 0.3X +DOW of timestamp 1367 1374 6 7.3 136.7 0.3X +ISODOW of timestamp 1317 1321 5 7.6 131.7 0.3X +DOY of timestamp 1223 1238 14 8.2 122.3 0.3X +HOUR of timestamp 361 362 2 27.7 36.1 1.1X +MINUTE of timestamp 354 362 10 28.3 35.4 1.1X +SECOND of timestamp 362 365 4 27.6 36.2 1.1X +MILLISECONDS of timestamp 36723 36761 63 0.3 3672.3 0.0X +MICROSECONDS of timestamp 469 490 29 21.3 46.9 0.8X +EPOCH of timestamp 30137 30181 38 0.3 3013.7 0.0X -Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.6 -Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz Invoke extract for date: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -cast to date 853 856 6 11.7 85.3 1.0X -MILLENNIUM of date 1081 1088 6 9.2 108.1 0.8X -CENTURY of date 1087 1090 4 9.2 108.7 0.8X -DECADE of date 1018 1019 1 9.8 101.8 0.8X -YEAR of date 996 1006 9 10.0 99.6 0.9X -ISOYEAR of date 1133 1147 21 8.8 113.3 0.8X -QUARTER of date 1246 1254 10 8.0 124.6 0.7X -MONTH of date 998 1002 4 10.0 99.8 0.9X -WEEK of date 1483 1490 7 6.7 148.3 0.6X -DAY of date 992 998 5 10.1 99.2 0.9X -DAYOFWEEK of date 1121 1128 7 8.9 112.1 0.8X -DOW of date 1118 1126 8 8.9 111.8 0.8X -ISODOW of date 1093 1103 9 9.1 109.3 0.8X -DOY of date 1026 1032 5 9.7 102.6 0.8X -HOUR of date 1707 1726 24 5.9 170.7 0.5X -MINUTE of date 1710 1731 19 5.8 171.0 0.5X -SECOND of date 1701 1720 19 5.9 170.1 0.5X -MILLISECONDS of date 2256 2272 19 4.4 225.6 0.4X -MICROSECONDS of date 1801 1810 11 5.6 180.1 0.5X -EPOCH of date 24848 24860 17 0.4 2484.8 0.0X +cast to date 1010 1022 11 9.9 101.0 1.0X +MILLENNIUM of date 1300 1311 18 7.7 130.0 0.8X +CENTURY of date 1304 1306 2 7.7 130.4 0.8X +DECADE of date 1199 1205 10 8.3 119.9 0.8X +YEAR of date 1191 1194 4 8.4 119.1 0.8X +ISOYEAR of date 1451 1456 9 6.9 145.1 0.7X +QUARTER of date 1494 1501 10 6.7 149.4 0.7X +MONTH of date 1189 1191 3 8.4 118.9 0.8X +WEEK of date 1893 1958 111 5.3 189.3 0.5X +DAY of date 1282 1285 3 7.8 128.2 0.8X +DAYOFWEEK of date 1374 1386 17 7.3 137.4 0.7X +DOW of date 1348 1351 3 7.4 134.8 0.7X +ISODOW of date 1292 1297 5 7.7 129.2 0.8X +DOY of date 1213 1216 3 8.2 121.3 0.8X +HOUR of date 1450 1458 9 6.9 145.0 0.7X +MINUTE of date 1445 1452 9 6.9 144.5 0.7X +SECOND of date 1448 1458 8 6.9 144.8 0.7X +MILLISECONDS of date 2094 2103 11 4.8 209.4 0.5X +MICROSECONDS of date 1562 1573 19 6.4 156.2 0.6X +EPOCH of date 31000 31047 68 0.3 3100.0 0.0X -Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.6 -Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz Invoke date_part for timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -cast to timestamp 227 253 39 44.0 22.7 1.0X -MILLENNIUM of timestamp 1121 1127 9 8.9 112.1 0.2X -CENTURY of timestamp 1084 1094 17 9.2 108.4 0.2X -DECADE of timestamp 1015 1037 35 9.8 101.5 0.2X -YEAR of timestamp 1011 1013 4 9.9 101.1 0.2X -ISOYEAR of timestamp 1121 1126 7 8.9 112.1 0.2X -QUARTER of timestamp 1243 1247 4 8.0 124.3 0.2X -MONTH of timestamp 1000 1009 14 10.0 100.0 0.2X -WEEK of timestamp 1481 1492 11 6.8 148.1 0.2X -DAY of timestamp 999 1000 1 10.0 99.9 0.2X -DAYOFWEEK of timestamp 1114 1132 20 9.0 111.4 0.2X -DOW of timestamp 1135 1147 17 8.8 113.5 0.2X -ISODOW of timestamp 1070 1079 10 9.3 107.0 0.2X -DOY of timestamp 1018 1027 8 9.8 101.8 0.2X -HOUR of timestamp 413 419 9 24.2 41.3 0.6X -MINUTE of timestamp 411 414 3 24.3 41.1 0.6X -SECOND of timestamp 410 413 3 24.4 41.0 0.6X -MILLISECONDS of timestamp 29225 29264 52 0.3 2922.5 0.0X -MICROSECONDS of timestamp 507 512 7 19.7 50.7 0.4X -EPOCH of timestamp 23565 23608 56 0.4 2356.5 0.0X +cast to timestamp 327 333 10 30.6 32.7 1.0X +MILLENNIUM of timestamp 1292 1296 4 7.7 129.2 0.3X +CENTURY of timestamp 1301 1305 6 7.7 130.1 0.3X +DECADE of timestamp 1200 1204 6 8.3 120.0 0.3X +YEAR of timestamp 1185 1193 8 8.4 118.5 0.3X +ISOYEAR of timestamp 1449 1469 18 6.9 144.9 0.2X +QUARTER of timestamp 1497 1505 7 6.7 149.7 0.2X +MONTH of timestamp 1185 1188 3 8.4 118.5 0.3X +WEEK of timestamp 1901 1909 7 5.3 190.1 0.2X +DAY of timestamp 1278 1282 4 7.8 127.8 0.3X +DAYOFWEEK of timestamp 1371 1376 5 7.3 137.1 0.2X +DOW of timestamp 1361 1372 17 7.3 136.1 0.2X +ISODOW of timestamp 1299 1306 9 7.7 129.9 0.3X +DOY of timestamp 1216 1219 4 8.2 121.6 0.3X +HOUR of timestamp 352 356 5 28.4 35.2 0.9X +MINUTE of timestamp 350 369 17 28.6 35.0 0.9X +SECOND of timestamp 351 364 19 28.5 35.1 0.9X +MILLISECONDS of timestamp 36989 37022 52 0.3 3698.9 0.0X +MICROSECONDS of timestamp 473 476 2 21.1 47.3 0.7X +EPOCH of timestamp 29890 29908 27 0.3 2989.0 0.0X -Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.6 -Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz Invoke date_part for date: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -cast to date 848 852 3 11.8 84.8 1.0X -MILLENNIUM of date 1083 1086 3 9.2 108.3 0.8X -CENTURY of date 1081 1084 5 9.3 108.1 0.8X -DECADE of date 1010 1014 4 9.9 101.0 0.8X -YEAR of date 992 1000 9 10.1 99.2 0.9X -ISOYEAR of date 1111 1116 6 9.0 111.1 0.8X -QUARTER of date 1230 1249 17 8.1 123.0 0.7X -MONTH of date 996 1008 11 10.0 99.6 0.9X -WEEK of date 1487 1516 28 6.7 148.7 0.6X -DAY of date 993 999 6 10.1 99.3 0.9X -DAYOFWEEK of date 1113 1118 5 9.0 111.3 0.8X -DOW of date 1113 1116 2 9.0 111.3 0.8X -ISODOW of date 1069 1072 3 9.4 106.9 0.8X -DOY of date 1027 1028 1 9.7 102.7 0.8X -HOUR of date 1707 1710 3 5.9 170.7 0.5X -MINUTE of date 1704 1705 2 5.9 170.4 0.5X -SECOND of date 1701 1705 4 5.9 170.1 0.5X -MILLISECONDS of date 2229 2238 9 4.5 222.9 0.4X -MICROSECONDS of date 1801 1808 12 5.6 180.1 0.5X -EPOCH of date 24783 24817 31 0.4 2478.3 0.0X +cast to date 1005 1006 1 9.9 100.5 1.0X +MILLENNIUM of date 1295 1300 5 7.7 129.5 0.8X +CENTURY of date 1297 1298 1 7.7 129.7 0.8X +DECADE of date 1198 1208 13 8.3 119.8 0.8X +YEAR of date 1184 1193 13 8.4 118.4 0.8X +ISOYEAR of date 1445 1460 13 6.9 144.5 0.7X +QUARTER of date 1495 1500 4 6.7 149.5 0.7X +MONTH of date 1176 1179 3 8.5 117.6 0.9X +WEEK of date 1893 1904 15 5.3 189.3 0.5X +DAY of date 1275 1283 8 7.8 127.5 0.8X +DAYOFWEEK of date 1369 1373 4 7.3 136.9 0.7X +DOW of date 1353 1354 2 7.4 135.3 0.7X +ISODOW of date 1290 1290 1 7.8 129.0 0.8X +DOY of date 1208 1212 4 8.3 120.8 0.8X +HOUR of date 1446 1449 2 6.9 144.6 0.7X +MINUTE of date 1441 1442 1 6.9 144.1 0.7X +SECOND of date 1443 1450 8 6.9 144.3 0.7X +MILLISECONDS of date 2087 2089 3 4.8 208.7 0.5X +MICROSECONDS of date 1557 1570 21 6.4 155.7 0.6X +EPOCH of date 30980 31001 32 0.3 3098.0 0.0X diff --git a/sql/core/benchmarks/PrimitiveArrayBenchmark-results.txt b/sql/core/benchmarks/PrimitiveArrayBenchmark-results.txt index b06b5c092b61..5fa461fecd4d 100644 --- a/sql/core/benchmarks/PrimitiveArrayBenchmark-results.txt +++ b/sql/core/benchmarks/PrimitiveArrayBenchmark-results.txt @@ -2,12 +2,11 @@ Write primitive arrays in dataset ================================================================================================ -Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.13.6 -Intel(R) Core(TM) i7-6820HQ CPU @ 2.70GHz - -Write an array in Dataset: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Int 437 / 529 19.2 52.1 1.0X -Double 638 / 670 13.1 76.1 0.7X +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +Write an array in Dataset: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Int 543 605 67 15.4 64.7 1.0X +Double 737 776 36 11.4 87.9 0.7X diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DataSourceReadBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DataSourceReadBenchmark.scala index bd2470ee2066..df122977fe5f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DataSourceReadBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DataSourceReadBenchmark.scala @@ -22,11 +22,10 @@ import scala.collection.JavaConverters._ import scala.util.Random import org.apache.spark.SparkConf -import org.apache.spark.benchmark.{Benchmark, BenchmarkBase} +import org.apache.spark.benchmark.Benchmark import org.apache.spark.internal.config.UI._ import org.apache.spark.sql.{DataFrame, DataFrameWriter, Row, SparkSession} import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.plans.SQLHelper import org.apache.spark.sql.execution.datasources.parquet.{SpecificParquetRecordReaderBase, VectorizedParquetRecordReader} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ @@ -44,21 +43,26 @@ import org.apache.spark.sql.vectorized.ColumnVector * Results will be written to "benchmarks/DataSourceReadBenchmark-results.txt". * }}} */ -object DataSourceReadBenchmark extends BenchmarkBase with SQLHelper { - val conf = new SparkConf() - .setAppName("DataSourceReadBenchmark") - // Since `spark.master` always exists, overrides this value - .set("spark.master", "local[1]") - .setIfMissing("spark.driver.memory", "3g") - .setIfMissing("spark.executor.memory", "3g") - .setIfMissing(UI_ENABLED, false) - - val spark = SparkSession.builder.config(conf).getOrCreate() - - // Set default configs. Individual cases will change them if necessary. - spark.conf.set(SQLConf.ORC_FILTER_PUSHDOWN_ENABLED.key, "true") - spark.conf.set(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key, "true") - spark.conf.set(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key, "true") +object DataSourceReadBenchmark extends SqlBasedBenchmark { + + override def getSparkSession: SparkSession = { + val conf = new SparkConf() + .setAppName("DataSourceReadBenchmark") + // Since `spark.master` always exists, overrides this value + .set("spark.master", "local[1]") + .setIfMissing("spark.driver.memory", "3g") + .setIfMissing("spark.executor.memory", "3g") + .setIfMissing(UI_ENABLED, false) + + val sparkSession = SparkSession.builder.config(conf).getOrCreate() + + // Set default configs. Individual cases will change them if necessary. + sparkSession.conf.set(SQLConf.ORC_FILTER_PUSHDOWN_ENABLED.key, "true") + sparkSession.conf.set(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key, "true") + sparkSession.conf.set(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key, "true") + + sparkSession + } def withTempTable(tableNames: String*)(f: => Unit): Unit = { try f finally tableNames.foreach(spark.catalog.dropTempView) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ExtractBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ExtractBenchmark.scala index b9086aa5cb37..a109b11b2d6d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ExtractBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ExtractBenchmark.scala @@ -19,9 +19,7 @@ package org.apache.spark.sql.execution.benchmark import java.time.Instant -import org.apache.spark.benchmark.{Benchmark, BenchmarkBase} -import org.apache.spark.sql.SparkSession -import org.apache.spark.sql.catalyst.plans.SQLHelper +import org.apache.spark.benchmark.Benchmark import org.apache.spark.sql.internal.SQLConf /** @@ -36,11 +34,7 @@ import org.apache.spark.sql.internal.SQLConf * Results will be written to "benchmarks/ExtractBenchmark-results.txt". * }}} */ -object ExtractBenchmark extends BenchmarkBase with SQLHelper { - private val spark: SparkSession = SparkSession.builder() - .master("local[1]") - .appName(this.getClass.getCanonicalName) - .getOrCreate() +object ExtractBenchmark extends SqlBasedBenchmark { private def doBenchmark(cardinality: Long, exprs: String*): Unit = { val sinceSecond = Instant.parse("2010-01-01T00:00:00Z").getEpochSecond diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala index b04024371713..50ba50176c7f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala @@ -22,10 +22,9 @@ import java.io.File import scala.util.Random import org.apache.spark.SparkConf -import org.apache.spark.benchmark.{Benchmark, BenchmarkBase} +import org.apache.spark.benchmark.Benchmark import org.apache.spark.internal.config.UI._ import org.apache.spark.sql.{DataFrame, SparkSession} -import org.apache.spark.sql.catalyst.plans.SQLHelper import org.apache.spark.sql.functions.monotonically_increasing_id import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.ParquetOutputTimestampType @@ -41,17 +40,21 @@ import org.apache.spark.sql.types.{ByteType, Decimal, DecimalType, TimestampType * Results will be written to "benchmarks/FilterPushdownBenchmark-results.txt". * }}} */ -object FilterPushdownBenchmark extends BenchmarkBase with SQLHelper { - - private val conf = new SparkConf() - .setAppName(this.getClass.getSimpleName) - // Since `spark.master` always exists, overrides this value - .set("spark.master", "local[1]") - .setIfMissing("spark.driver.memory", "3g") - .setIfMissing("spark.executor.memory", "3g") - .setIfMissing(UI_ENABLED, false) - .setIfMissing("orc.compression", "snappy") - .setIfMissing("spark.sql.parquet.compression.codec", "snappy") +object FilterPushdownBenchmark extends SqlBasedBenchmark { + + override def getSparkSession: SparkSession = { + val conf = new SparkConf() + .setAppName(this.getClass.getSimpleName) + // Since `spark.master` always exists, overrides this value + .set("spark.master", "local[1]") + .setIfMissing("spark.driver.memory", "3g") + .setIfMissing("spark.executor.memory", "3g") + .setIfMissing(UI_ENABLED, false) + .setIfMissing("orc.compression", "snappy") + .setIfMissing("spark.sql.parquet.compression.codec", "snappy") + + SparkSession.builder().config(conf).getOrCreate() + } private val numRows = 1024 * 1024 * 15 private val width = 5 @@ -59,8 +62,6 @@ object FilterPushdownBenchmark extends BenchmarkBase with SQLHelper { // For Parquet/ORC, we will use the same value for block size and compression size private val blockSize = org.apache.parquet.hadoop.ParquetWriter.DEFAULT_PAGE_SIZE - private val spark = SparkSession.builder().config(conf).getOrCreate() - def withTempTable(tableNames: String*)(f: => Unit): Unit = { try f finally tableNames.foreach(spark.catalog.dropTempView) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/PrimitiveArrayBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/PrimitiveArrayBenchmark.scala index 8b1c422e63a3..e07921bf3aa7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/PrimitiveArrayBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/PrimitiveArrayBenchmark.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.execution.benchmark -import org.apache.spark.benchmark.{Benchmark, BenchmarkBase} +import org.apache.spark.benchmark.Benchmark import org.apache.spark.sql.SparkSession /** @@ -28,13 +28,16 @@ import org.apache.spark.sql.SparkSession * 3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain " * Results will be written to "benchmarks/PrimitiveArrayBenchmark-results.txt". */ -object PrimitiveArrayBenchmark extends BenchmarkBase { - lazy val sparkSession = SparkSession.builder - .master("local[1]") - .appName("microbenchmark") - .config("spark.sql.shuffle.partitions", 1) - .config("spark.sql.autoBroadcastJoinThreshold", 1) - .getOrCreate() +object PrimitiveArrayBenchmark extends SqlBasedBenchmark { + + override def getSparkSession: SparkSession = { + SparkSession.builder + .master("local[1]") + .appName("microbenchmark") + .config("spark.sql.shuffle.partitions", 1) + .config("spark.sql.autoBroadcastJoinThreshold", 1) + .getOrCreate() + } override def runBenchmarkSuite(mainArgs: Array[String]): Unit = { runBenchmark("Write primitive arrays in dataset") { @@ -43,11 +46,11 @@ object PrimitiveArrayBenchmark extends BenchmarkBase { } def writeDatasetArray(iters: Int): Unit = { - import sparkSession.implicits._ + import spark.implicits._ val count = 1024 * 1024 * 2 - val sc = sparkSession.sparkContext + val sc = spark.sparkContext val primitiveIntArray = Array.fill[Int](count)(65535) val dsInt = sc.parallelize(Seq(primitiveIntArray), 1).toDS dsInt.count // force to build dataset diff --git a/sql/hive/benchmarks/ObjectHashAggregateExecBenchmark-results.txt b/sql/hive/benchmarks/ObjectHashAggregateExecBenchmark-results.txt index f3044da97249..0c394a340333 100644 --- a/sql/hive/benchmarks/ObjectHashAggregateExecBenchmark-results.txt +++ b/sql/hive/benchmarks/ObjectHashAggregateExecBenchmark-results.txt @@ -2,44 +2,44 @@ Hive UDAF vs Spark AF ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -hive udaf vs spark af: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -hive udaf w/o group by 6370 / 6400 0.0 97193.6 1.0X -spark af w/o group by 54 / 63 1.2 820.8 118.4X -hive udaf w/ group by 4492 / 4507 0.0 68539.5 1.4X -spark af w/ group by w/o fallback 58 / 64 1.1 881.7 110.2X -spark af w/ group by w/ fallback 136 / 142 0.5 2075.0 46.8X +hive udaf vs spark af: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +hive udaf w/o group by 6741 6759 22 0.0 102864.5 1.0X +spark af w/o group by 56 66 9 1.2 851.6 120.8X +hive udaf w/ group by 4610 4642 25 0.0 70350.3 1.5X +spark af w/ group by w/o fallback 60 67 8 1.1 916.7 112.2X +spark af w/ group by w/ fallback 135 144 9 0.5 2065.6 49.8X ================================================================================================ ObjectHashAggregateExec vs SortAggregateExec - typed_count ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -object agg v.s. sort agg: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -sort agg w/ group by 41500 / 41630 2.5 395.8 1.0X -object agg w/ group by w/o fallback 10075 / 10122 10.4 96.1 4.1X -object agg w/ group by w/ fallback 28131 / 28205 3.7 268.3 1.5X -sort agg w/o group by 6182 / 6221 17.0 59.0 6.7X -object agg w/o group by w/o fallback 5435 / 5468 19.3 51.8 7.6X +object agg v.s. sort agg: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +sort agg w/ group by 41568 41894 461 2.5 396.4 1.0X +object agg w/ group by w/o fallback 10314 10494 149 10.2 98.4 4.0X +object agg w/ group by w/ fallback 26720 26951 326 3.9 254.8 1.6X +sort agg w/o group by 6638 6681 38 15.8 63.3 6.3X +object agg w/o group by w/o fallback 5665 5706 30 18.5 54.0 7.3X ================================================================================================ ObjectHashAggregateExec vs SortAggregateExec - percentile_approx ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -object agg v.s. sort agg: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -sort agg w/ group by 970 / 1025 2.2 462.5 1.0X -object agg w/ group by w/o fallback 772 / 798 2.7 368.1 1.3X -object agg w/ group by w/ fallback 1013 / 1044 2.1 483.1 1.0X -sort agg w/o group by 751 / 781 2.8 358.0 1.3X -object agg w/o group by w/o fallback 772 / 814 2.7 368.0 1.3X +object agg v.s. sort agg: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +sort agg w/ group by 794 862 33 2.6 378.8 1.0X +object agg w/ group by w/o fallback 605 622 10 3.5 288.5 1.3X +object agg w/ group by w/ fallback 840 860 15 2.5 400.5 0.9X +sort agg w/o group by 555 570 12 3.8 264.6 1.4X +object agg w/o group by w/o fallback 544 562 12 3.9 259.6 1.5X diff --git a/sql/hive/benchmarks/OrcReadBenchmark-results.txt b/sql/hive/benchmarks/OrcReadBenchmark-results.txt index caa78b9a8f10..c47cf27bf617 100644 --- a/sql/hive/benchmarks/OrcReadBenchmark-results.txt +++ b/sql/hive/benchmarks/OrcReadBenchmark-results.txt @@ -2,155 +2,155 @@ SQL Single Numeric Column Scan ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -SQL Single TINYINT Column Scan: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Native ORC MR 1725 / 1759 9.1 109.7 1.0X -Native ORC Vectorized 272 / 316 57.8 17.3 6.3X -Hive built-in ORC 1970 / 1987 8.0 125.3 0.9X +SQL Single TINYINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Native ORC MR 1843 1958 162 8.5 117.2 1.0X +Native ORC Vectorized 321 355 31 48.9 20.4 5.7X +Hive built-in ORC 2143 2175 44 7.3 136.3 0.9X -OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -SQL Single SMALLINT Column Scan: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Native ORC MR 1633 / 1672 9.6 103.8 1.0X -Native ORC Vectorized 238 / 255 66.0 15.1 6.9X -Hive built-in ORC 2293 / 2305 6.9 145.8 0.7X +SQL Single SMALLINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Native ORC MR 1987 2020 47 7.9 126.3 1.0X +Native ORC Vectorized 276 299 25 57.0 17.6 7.2X +Hive built-in ORC 2350 2357 10 6.7 149.4 0.8X -OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -SQL Single INT Column Scan: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Native ORC MR 1677 / 1699 9.4 106.6 1.0X -Native ORC Vectorized 325 / 342 48.3 20.7 5.2X -Hive built-in ORC 2561 / 2569 6.1 162.8 0.7X +SQL Single INT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Native ORC MR 2092 2115 32 7.5 133.0 1.0X +Native ORC Vectorized 360 373 18 43.6 22.9 5.8X +Hive built-in ORC 2550 2557 9 6.2 162.2 0.8X -OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -SQL Single BIGINT Column Scan: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Native ORC MR 1791 / 1795 8.8 113.9 1.0X -Native ORC Vectorized 400 / 408 39.3 25.4 4.5X -Hive built-in ORC 2713 / 2720 5.8 172.5 0.7X +SQL Single BIGINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Native ORC MR 2173 2188 21 7.2 138.2 1.0X +Native ORC Vectorized 435 448 14 36.2 27.7 5.0X +Hive built-in ORC 2683 2690 10 5.9 170.6 0.8X -OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -SQL Single FLOAT Column Scan: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Native ORC MR 1791 / 1805 8.8 113.8 1.0X -Native ORC Vectorized 433 / 438 36.3 27.5 4.1X -Hive built-in ORC 2690 / 2803 5.8 171.0 0.7X +SQL Single FLOAT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Native ORC MR 2233 2323 127 7.0 142.0 1.0X +Native ORC Vectorized 475 483 13 33.1 30.2 4.7X +Hive built-in ORC 2605 2610 6 6.0 165.7 0.9X -OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -SQL Single DOUBLE Column Scan: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Native ORC MR 1911 / 1930 8.2 121.5 1.0X -Native ORC Vectorized 543 / 552 29.0 34.5 3.5X -Hive built-in ORC 2967 / 3065 5.3 188.6 0.6X +SQL Single DOUBLE Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Native ORC MR 2367 2384 24 6.6 150.5 1.0X +Native ORC Vectorized 600 641 69 26.2 38.1 3.9X +Hive built-in ORC 2860 2877 24 5.5 181.9 0.8X ================================================================================================ Int and String Scan ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Int and String Scan: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Native ORC MR 4160 / 4188 2.5 396.7 1.0X -Native ORC Vectorized 2405 / 2406 4.4 229.4 1.7X -Hive built-in ORC 5514 / 5562 1.9 525.9 0.8X +Int and String Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Native ORC MR 4253 4330 108 2.5 405.6 1.0X +Native ORC Vectorized 2295 2301 8 4.6 218.9 1.9X +Hive built-in ORC 5364 5465 144 2.0 511.5 0.8X ================================================================================================ Partitioned Table Scan ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Partitioned Table: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Data column - Native ORC MR 1863 / 1867 8.4 118.4 1.0X -Data column - Native ORC Vectorized 411 / 418 38.2 26.2 4.5X -Data column - Hive built-in ORC 3297 / 3308 4.8 209.6 0.6X -Partition column - Native ORC MR 1505 / 1506 10.4 95.7 1.2X -Partition column - Native ORC Vectorized 80 / 93 195.6 5.1 23.2X -Partition column - Hive built-in ORC 1960 / 1979 8.0 124.6 1.0X -Both columns - Native ORC MR 2076 / 2090 7.6 132.0 0.9X -Both columns - Native ORC Vectorized 450 / 463 34.9 28.6 4.1X -Both columns - Hive built-in ORC 3528 / 3548 4.5 224.3 0.5X +Partitioned Table: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Data column - Native ORC MR 2443 2448 6 6.4 155.3 1.0X +Data column - Native ORC Vectorized 446 473 44 35.3 28.3 5.5X +Data column - Hive built-in ORC 2868 2877 12 5.5 182.4 0.9X +Partition column - Native ORC MR 1623 1656 47 9.7 103.2 1.5X +Partition column - Native ORC Vectorized 112 121 14 140.8 7.1 21.9X +Partition column - Hive built-in ORC 1846 1850 5 8.5 117.4 1.3X +Both columns - Native ORC MR 2610 2635 36 6.0 165.9 0.9X +Both columns - Native ORC Vectorized 492 508 19 32.0 31.3 5.0X +Both columns - Hive built-in ORC 2969 2973 4 5.3 188.8 0.8X ================================================================================================ Repeated String Scan ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Repeated String: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Native ORC MR 1727 / 1733 6.1 164.7 1.0X -Native ORC Vectorized 375 / 379 28.0 35.7 4.6X -Hive built-in ORC 2665 / 2666 3.9 254.2 0.6X +Repeated String: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Native ORC MR 2056 2064 11 5.1 196.1 1.0X +Native ORC Vectorized 415 421 7 25.3 39.6 5.0X +Hive built-in ORC 2710 2722 17 3.9 258.4 0.8X ================================================================================================ String with Nulls Scan ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -String with Nulls Scan (0.0%): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Native ORC MR 3324 / 3325 3.2 317.0 1.0X -Native ORC Vectorized 1085 / 1106 9.7 103.4 3.1X -Hive built-in ORC 5272 / 5299 2.0 502.8 0.6X +String with Nulls Scan (0.0%): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Native ORC MR 3655 3674 27 2.9 348.6 1.0X +Native ORC Vectorized 1166 1167 1 9.0 111.2 3.1X +Hive built-in ORC 5268 5305 52 2.0 502.4 0.7X -OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -String with Nulls Scan (50.0%): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Native ORC MR 3045 / 3046 3.4 290.4 1.0X -Native ORC Vectorized 1248 / 1260 8.4 119.0 2.4X -Hive built-in ORC 3989 / 3999 2.6 380.4 0.8X +String with Nulls Scan (50.0%): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Native ORC MR 3447 3467 27 3.0 328.8 1.0X +Native ORC Vectorized 1222 1223 1 8.6 116.6 2.8X +Hive built-in ORC 3947 3959 18 2.7 376.4 0.9X -OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -String with Nulls Scan (95.0%): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Native ORC MR 1692 / 1694 6.2 161.3 1.0X -Native ORC Vectorized 471 / 493 22.3 44.9 3.6X -Hive built-in ORC 2398 / 2411 4.4 228.7 0.7X +String with Nulls Scan (95.0%): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Native ORC MR 1912 1917 6 5.5 182.4 1.0X +Native ORC Vectorized 477 484 5 22.0 45.5 4.0X +Hive built-in ORC 2374 2386 17 4.4 226.4 0.8X ================================================================================================ Single Column Scan From Wide Columns ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Single Column Scan from 100 columns: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Native ORC MR 1371 / 1379 0.8 1307.5 1.0X -Native ORC Vectorized 121 / 135 8.6 115.8 11.3X -Hive built-in ORC 521 / 561 2.0 497.1 2.6X +Single Column Scan from 100 columns: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Native ORC MR 290 350 102 3.6 276.1 1.0X +Native ORC Vectorized 155 166 15 6.7 148.2 1.9X +Hive built-in ORC 520 531 8 2.0 495.8 0.6X -OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Single Column Scan from 200 columns: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Native ORC MR 2711 / 2767 0.4 2585.5 1.0X -Native ORC Vectorized 210 / 232 5.0 200.5 12.9X -Hive built-in ORC 764 / 775 1.4 728.3 3.5X +Single Column Scan from 200 columns: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Native ORC MR 365 406 73 2.9 347.9 1.0X +Native ORC Vectorized 232 246 20 4.5 221.6 1.6X +Hive built-in ORC 794 864 62 1.3 757.6 0.5X -OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 +OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Single Column Scan from 300 columns: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Native ORC MR 3979 / 3988 0.3 3794.4 1.0X -Native ORC Vectorized 357 / 366 2.9 340.2 11.2X -Hive built-in ORC 1091 / 1095 1.0 1040.5 3.6X +Single Column Scan from 300 columns: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Native ORC MR 501 544 40 2.1 477.6 1.0X +Native ORC Vectorized 365 386 33 2.9 348.0 1.4X +Hive built-in ORC 1153 1153 0 0.9 1099.8 0.4X diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/execution/benchmark/ObjectHashAggregateExecBenchmark.scala b/sql/hive/src/test/scala/org/apache/spark/sql/execution/benchmark/ObjectHashAggregateExecBenchmark.scala index 3226e3a5f318..c475c7b21ab9 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/execution/benchmark/ObjectHashAggregateExecBenchmark.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/execution/benchmark/ObjectHashAggregateExecBenchmark.scala @@ -21,11 +21,10 @@ import scala.concurrent.duration._ import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFPercentileApprox -import org.apache.spark.benchmark.{Benchmark, BenchmarkBase} +import org.apache.spark.benchmark.Benchmark import org.apache.spark.sql.{Column, SparkSession} import org.apache.spark.sql.catalyst.expressions.Literal import org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile -import org.apache.spark.sql.catalyst.plans.SQLHelper import org.apache.spark.sql.hive.execution.TestingTypedCount import org.apache.spark.sql.hive.test.TestHive import org.apache.spark.sql.internal.SQLConf @@ -44,9 +43,10 @@ import org.apache.spark.sql.types.LongType * Results will be written to "benchmarks/ObjectHashAggregateExecBenchmark-results.txt". * }}} */ -object ObjectHashAggregateExecBenchmark extends BenchmarkBase with SQLHelper { +object ObjectHashAggregateExecBenchmark extends SqlBasedBenchmark { + + override def getSparkSession: SparkSession = TestHive.sparkSession - private val spark: SparkSession = TestHive.sparkSession private val sql = spark.sql _ import spark.implicits._ diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala index c03ae144a159..f28b9be60d3c 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala @@ -22,9 +22,9 @@ import java.io.File import scala.util.Random import org.apache.spark.SparkConf -import org.apache.spark.benchmark.{Benchmark, BenchmarkBase} +import org.apache.spark.benchmark.Benchmark import org.apache.spark.sql.{DataFrame, SparkSession} -import org.apache.spark.sql.catalyst.plans.SQLHelper +import org.apache.spark.sql.execution.benchmark.SqlBasedBenchmark import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ @@ -43,18 +43,23 @@ import org.apache.spark.sql.types._ * This is in `sql/hive` module in order to compare `sql/core` and `sql/hive` ORC data sources. */ // scalastyle:off line.size.limit -object OrcReadBenchmark extends BenchmarkBase with SQLHelper { - val conf = new SparkConf() - conf.set("orc.compression", "snappy") - - private val spark = SparkSession.builder() - .master("local[1]") - .appName("OrcReadBenchmark") - .config(conf) - .getOrCreate() - - // Set default configs. Individual cases will change them if necessary. - spark.conf.set(SQLConf.ORC_FILTER_PUSHDOWN_ENABLED.key, "true") +object OrcReadBenchmark extends SqlBasedBenchmark { + + override def getSparkSession: SparkSession = { + val conf = new SparkConf() + conf.set("orc.compression", "snappy") + + val sparkSession = SparkSession.builder() + .master("local[1]") + .appName("OrcReadBenchmark") + .config(conf) + .getOrCreate() + + // Set default configs. Individual cases will change them if necessary. + sparkSession.conf.set(SQLConf.ORC_FILTER_PUSHDOWN_ENABLED.key, "true") + + sparkSession + } def withTempTable(tableNames: String*)(f: => Unit): Unit = { try f finally tableNames.foreach(spark.catalog.dropTempView)