From e9cf4aec0b26129d078ebbbd3aa3c1b998fccf12 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Thu, 22 Aug 2024 06:03:40 -0600 Subject: [PATCH 1/5] Add benchmarks for Spark Scan + Comet Exec --- .../benchmark/CometAggregateBenchmark.scala | 2 +- .../benchmark/CometArithmeticBenchmark.scala | 3 +-- .../CometConditionalExpressionBenchmark.scala | 3 +-- .../CometDatetimeExpressionBenchmark.scala | 3 +-- .../sql/benchmark/CometExecBenchmark.scala | 3 +-- .../CometPredicateExpressionBenchmark.scala | 3 +-- .../sql/benchmark/CometReadBenchmark.scala | 3 +-- .../sql/benchmark/CometShuffleBenchmark.scala | 3 +-- .../CometStringExpressionBenchmark.scala | 3 +-- .../benchmark/CometTPCDSMicroBenchmark.scala | 20 +++++++++++++++---- .../benchmark/CometTPCDSQueryBenchmark.scala | 2 +- .../benchmark/CometTPCHQueryBenchmark.scala | 2 +- .../CometTPCQueryBenchmarkBase.scala | 17 +++++++++++++--- 13 files changed, 41 insertions(+), 26 deletions(-) diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometAggregateBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometAggregateBenchmark.scala index 86b59050e..916f36f5f 100644 --- a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometAggregateBenchmark.scala +++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometAggregateBenchmark.scala @@ -28,7 +28,7 @@ import org.apache.comet.CometConf /** * Benchmark to measure Comet execution performance. To run this benchmark: * {{{ - * SPARK_GENERATE_BENCHMARK_FILES=1 make benchmark-org.apache.spark.sql.benchmark.CometAggregateBenchmark + * make benchmark-org.apache.spark.sql.benchmark.CometAggregateBenchmark * }}} * * Results will be written to "spark/benchmarks/CometAggregateBenchmark-**results.txt". diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometArithmeticBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometArithmeticBenchmark.scala index c6fe55b56..1e78c8af2 100644 --- a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometArithmeticBenchmark.scala +++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometArithmeticBenchmark.scala @@ -25,8 +25,7 @@ import org.apache.spark.sql.types._ import org.apache.comet.CometConf /** - * Benchmark to measure Comet expression evaluation performance. To run this benchmark: - * `SPARK_GENERATE_BENCHMARK_FILES=1 make + * Benchmark to measure Comet expression evaluation performance. To run this benchmark: `make * benchmark-org.apache.spark.sql.benchmark.CometArithmeticBenchmark` Results will be written to * "spark/benchmarks/CometArithmeticBenchmark-**results.txt". */ diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometConditionalExpressionBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometConditionalExpressionBenchmark.scala index 0dddfb36a..31adc4518 100644 --- a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometConditionalExpressionBenchmark.scala +++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometConditionalExpressionBenchmark.scala @@ -24,8 +24,7 @@ import org.apache.spark.benchmark.Benchmark import org.apache.comet.CometConf /** - * Benchmark to measure Comet execution performance. To run this benchmark: - * `SPARK_GENERATE_BENCHMARK_FILES=1 make + * Benchmark to measure Comet execution performance. To run this benchmark: `make * benchmark-org.apache.spark.sql.benchmark.CometConditionalExpressionBenchmark` Results will be * written to "spark/benchmarks/CometConditionalExpressionBenchmark-**results.txt". */ diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometDatetimeExpressionBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometDatetimeExpressionBenchmark.scala index 0af1ecade..7a95471cf 100644 --- a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometDatetimeExpressionBenchmark.scala +++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometDatetimeExpressionBenchmark.scala @@ -23,8 +23,7 @@ import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.{withDefaultTimeZone import org.apache.spark.sql.internal.SQLConf /** - * Benchmark to measure Comet execution performance. To run this benchmark: - * `SPARK_GENERATE_BENCHMARK_FILES=1 make + * Benchmark to measure Comet execution performance. To run this benchmark: `make * benchmark-org.apache.spark.sql.benchmark.CometDatetimeExpressionBenchmark` Results will be * written to "spark/benchmarks/CometDatetimeExpressionBenchmark-**results.txt". */ diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometExecBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometExecBenchmark.scala index 400d9b829..cc219158f 100644 --- a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometExecBenchmark.scala +++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometExecBenchmark.scala @@ -27,8 +27,7 @@ import org.apache.spark.sql.internal.SQLConf import org.apache.comet.{CometConf, CometSparkSessionExtensions} /** - * Benchmark to measure Comet execution performance. To run this benchmark: - * `SPARK_GENERATE_BENCHMARK_FILES=1 make + * Benchmark to measure Comet execution performance. To run this benchmark: `make * benchmark-org.apache.spark.sql.benchmark.CometExecBenchmark` Results will be written to * "spark/benchmarks/CometExecBenchmark-**results.txt". */ diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometPredicateExpressionBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometPredicateExpressionBenchmark.scala index 2ca924821..fce81f860 100644 --- a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometPredicateExpressionBenchmark.scala +++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometPredicateExpressionBenchmark.scala @@ -24,8 +24,7 @@ import org.apache.spark.benchmark.Benchmark import org.apache.comet.CometConf /** - * Benchmark to measure Comet execution performance. To run this benchmark: - * `SPARK_GENERATE_BENCHMARK_FILES=1 make + * Benchmark to measure Comet execution performance. To run this benchmark: `make * benchmark-org.apache.spark.sql.benchmark.CometPredicateExpressionBenchmark` Results will be * written to "spark/benchmarks/CometPredicateExpressionBenchmark -**results.txt". */ diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometReadBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometReadBenchmark.scala index b47de19ba..ef578dcc4 100644 --- a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometReadBenchmark.scala +++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometReadBenchmark.scala @@ -34,8 +34,7 @@ import org.apache.comet.CometConf import org.apache.comet.parquet.BatchReader /** - * Benchmark to measure Comet read performance. To run this benchmark: - * `SPARK_GENERATE_BENCHMARK_FILES=1 make + * Benchmark to measure Comet read performance. To run this benchmark: `make * benchmark-org.apache.spark.sql.benchmark.CometReadBenchmark` Results will be written to * "spark/benchmarks/CometReadBenchmark-**results.txt". */ diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometShuffleBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometShuffleBenchmark.scala index 46af7115c..09041a692 100644 --- a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometShuffleBenchmark.scala +++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometShuffleBenchmark.scala @@ -29,8 +29,7 @@ import org.apache.comet.CometConf import org.apache.comet.CometSparkSessionExtensions /** - * Benchmark to measure Comet shuffle performance. To run this benchmark: - * `SPARK_GENERATE_BENCHMARK_FILES=1 make + * Benchmark to measure Comet shuffle performance. To run this benchmark: `make * benchmark-org.apache.spark.sql.benchmark.CometShuffleBenchmark` Results will be written to * "spark/benchmarks/CometShuffleBenchmark-**results.txt". */ diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometStringExpressionBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometStringExpressionBenchmark.scala index 0546c9173..376fee6ca 100644 --- a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometStringExpressionBenchmark.scala +++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometStringExpressionBenchmark.scala @@ -24,8 +24,7 @@ import org.apache.spark.benchmark.Benchmark import org.apache.comet.CometConf /** - * Benchmark to measure Comet execution performance. To run this benchmark: - * `SPARK_GENERATE_BENCHMARK_FILES=1 make + * Benchmark to measure Comet execution performance. To run this benchmark: `make * benchmark-org.apache.spark.sql.benchmark.CometStringExpressionBenchmark` Results will be * written to "spark/benchmarks/CometStringExpressionBenchmark-**results.txt". */ diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometTPCDSMicroBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometTPCDSMicroBenchmark.scala index 016c2a371..37e2b3734 100644 --- a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometTPCDSMicroBenchmark.scala +++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometTPCDSMicroBenchmark.scala @@ -45,7 +45,7 @@ import org.apache.comet.CometConf * make benchmark-org.apache.spark.sql.GenTPCDSData -- --dsdgenDir /tmp/tpcds-kit/tools --location /tmp/tpcds --scaleFactor 1 * * // CometTPCDSMicroBenchmark - * SPARK_GENERATE_BENCHMARK_FILES=1 make benchmark-org.apache.spark.sql.benchmark.CometTPCDSMicroBenchmark -- --data-location /tmp/tpcds + * make benchmark-org.apache.spark.sql.benchmark.CometTPCDSMicroBenchmark -- --data-location /tmp/tpcds * }}} * * Results will be written to "spark/benchmarks/CometTPCDSMicroBenchmark-**results.txt". @@ -104,15 +104,27 @@ object CometTPCDSMicroBenchmark extends CometTPCQueryBenchmarkBase { } val numRows = queryRelations.map(tableSizes.getOrElse(_, 0L)).sum val benchmark = new Benchmark(benchmarkName, numRows, 2, output = output) - benchmark.addCase(s"$name$nameSuffix") { _ => + benchmark.addCase(s"$name$nameSuffix: Spark Scan + Spark Exec") { _ => cometSpark.sql(queryString).noop() } - benchmark.addCase(s"$name$nameSuffix: Comet (Scan)") { _ => + benchmark.addCase(s"$name$nameSuffix: Comet Scan + Spark Exec") { _ => withSQLConf(CometConf.COMET_ENABLED.key -> "true") { cometSpark.sql(queryString).noop() } } - benchmark.addCase(s"$name$nameSuffix: Comet (Scan, Exec)") { _ => + benchmark.addCase(s"$name$nameSuffix: Comet Scan + Comet Exec") { _ => + withSQLConf( + CometConf.COMET_ENABLED.key -> "true", + CometConf.COMET_EXEC_SHUFFLE_ENABLED.key -> "true", + CometConf.COMET_SHUFFLE_MODE.key -> "auto", + CometConf.COMET_REGEXP_ALLOW_INCOMPATIBLE.key -> "true", + // enabling COMET_EXPLAIN_NATIVE_ENABLED may add overhead but is useful for debugging + CometConf.COMET_EXPLAIN_NATIVE_ENABLED.key -> "false", + CometConf.COMET_EXEC_ENABLED.key -> "true") { + cometSpark.sql(queryString).noop() + } + } + benchmark.addCase(s"$name$nameSuffix: Spark Scan + Comet Exec") { _ => withSQLConf( CometConf.COMET_ENABLED.key -> "true", CometConf.COMET_EXEC_SHUFFLE_ENABLED.key -> "true", diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometTPCDSQueryBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometTPCDSQueryBenchmark.scala index 1be5d685d..ca845f5a3 100644 --- a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometTPCDSQueryBenchmark.scala +++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometTPCDSQueryBenchmark.scala @@ -38,7 +38,7 @@ import org.apache.spark.sql.types.StructType * make benchmark-org.apache.spark.sql.GenTPCDSData -- --dsdgenDir /tmp/tpcds-kit/tools --location /tmp/tpcds --scaleFactor 1 * * // CometTPCDSQueryBenchmark - * SPARK_GENERATE_BENCHMARK_FILES=1 make benchmark-org.apache.spark.sql.benchmark.CometTPCDSQueryBenchmark -- --data-location /tmp/tpcds + * make benchmark-org.apache.spark.sql.benchmark.CometTPCDSQueryBenchmark -- --data-location /tmp/tpcds * }}} * * Results will be written to "spark/benchmarks/CometTPCDSQueryBenchmark-**results.txt". diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometTPCHQueryBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometTPCHQueryBenchmark.scala index af1ee3a49..c5c8a2301 100644 --- a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometTPCHQueryBenchmark.scala +++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometTPCHQueryBenchmark.scala @@ -34,7 +34,7 @@ import java.util.Locale * make benchmark-org.apache.spark.sql.GenTPCHData -- --location /tmp --scaleFactor ${scale_factor} * * // CometTPCHQueryBenchmark - * SPARK_GENERATE_BENCHMARK_FILES=1 make benchmark-org.apache.spark.sql.benchmark.CometTPCHQueryBenchmark -- --data-location /tmp/tpch/sf${scale_factor}_parquet + * make benchmark-org.apache.spark.sql.benchmark.CometTPCHQueryBenchmark -- --data-location /tmp/tpch/sf${scale_factor}_parquet * }}} * * Results will be written to "spark/benchmarks/CometTPCHQueryBenchmark-**results.txt". diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometTPCQueryBenchmarkBase.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometTPCQueryBenchmarkBase.scala index 2361346b8..bf481cfa9 100644 --- a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometTPCQueryBenchmarkBase.scala +++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometTPCQueryBenchmarkBase.scala @@ -62,15 +62,15 @@ trait CometTPCQueryBenchmarkBase extends SqlBasedBenchmark with CometTPCQueryBas } val numRows = queryRelations.map(tableSizes.getOrElse(_, 0L)).sum val benchmark = new Benchmark(benchmarkName, numRows, 2, output = output) - benchmark.addCase(s"$name$nameSuffix") { _ => + benchmark.addCase(s"$name$nameSuffix: Spark Scan + Spark Exec") { _ => cometSpark.sql(queryString).noop() } - benchmark.addCase(s"$name$nameSuffix: Comet (Scan)") { _ => + benchmark.addCase(s"$name$nameSuffix: Comet Scan + Spark Exec") { _ => withSQLConf(CometConf.COMET_ENABLED.key -> "true") { cometSpark.sql(queryString).noop() } } - benchmark.addCase(s"$name$nameSuffix: Comet (Scan, Exec)") { _ => + benchmark.addCase(s"$name$nameSuffix: Comet Scan + Comet Exec") { _ => withSQLConf( CometConf.COMET_ENABLED.key -> "true", CometConf.COMET_EXEC_ENABLED.key -> "true", @@ -79,6 +79,17 @@ trait CometTPCQueryBenchmarkBase extends SqlBasedBenchmark with CometTPCQueryBas cometSpark.sql(queryString).noop() } } + benchmark.addCase(s"$name$nameSuffix: Spark Scan + Comet Exec") { _ => + withSQLConf( + CometConf.COMET_ENABLED.key -> "true", + CometConf.COMET_EXEC_ENABLED.key -> "true", + CometConf.COMET_NATIVE_SCAN_ENABLED.key -> "false", + CometConf.COMET_CONVERT_FROM_PARQUET_ENABLED.key -> "true", + CometConf.COMET_EXEC_SHUFFLE_ENABLED.key -> "true", + CometConf.COMET_SHUFFLE_MODE.key -> "auto") { + cometSpark.sql(queryString).noop() + } + } benchmark.run() } } From 2257567a3d3a64d9cee7ab9efde867e97beedf7c Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Thu, 22 Aug 2024 21:58:02 -0600 Subject: [PATCH 2/5] address feedback --- .../spark/sql/benchmark/CometTPCDSMicroBenchmark.scala | 8 ++++---- .../spark/sql/benchmark/CometTPCQueryBenchmarkBase.scala | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometTPCDSMicroBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometTPCDSMicroBenchmark.scala index 37e2b3734..f40e4d37c 100644 --- a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometTPCDSMicroBenchmark.scala +++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometTPCDSMicroBenchmark.scala @@ -104,15 +104,15 @@ object CometTPCDSMicroBenchmark extends CometTPCQueryBenchmarkBase { } val numRows = queryRelations.map(tableSizes.getOrElse(_, 0L)).sum val benchmark = new Benchmark(benchmarkName, numRows, 2, output = output) - benchmark.addCase(s"$name$nameSuffix: Spark Scan + Spark Exec") { _ => + benchmark.addCase(s"$name$nameSuffix") { _ => cometSpark.sql(queryString).noop() } - benchmark.addCase(s"$name$nameSuffix: Comet Scan + Spark Exec") { _ => + benchmark.addCase(s"$name$nameSuffix: Comet (Scan)") { _ => withSQLConf(CometConf.COMET_ENABLED.key -> "true") { cometSpark.sql(queryString).noop() } } - benchmark.addCase(s"$name$nameSuffix: Comet Scan + Comet Exec") { _ => + benchmark.addCase(s"$name$nameSuffix: Comet (Scan, Exec)") { _ => withSQLConf( CometConf.COMET_ENABLED.key -> "true", CometConf.COMET_EXEC_SHUFFLE_ENABLED.key -> "true", @@ -124,7 +124,7 @@ object CometTPCDSMicroBenchmark extends CometTPCQueryBenchmarkBase { cometSpark.sql(queryString).noop() } } - benchmark.addCase(s"$name$nameSuffix: Spark Scan + Comet Exec") { _ => + benchmark.addCase(s"$name$nameSuffix: Comet (Exec)") { _ => withSQLConf( CometConf.COMET_ENABLED.key -> "true", CometConf.COMET_EXEC_SHUFFLE_ENABLED.key -> "true", diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometTPCQueryBenchmarkBase.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometTPCQueryBenchmarkBase.scala index bf481cfa9..c9ec4ed6e 100644 --- a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometTPCQueryBenchmarkBase.scala +++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometTPCQueryBenchmarkBase.scala @@ -65,12 +65,12 @@ trait CometTPCQueryBenchmarkBase extends SqlBasedBenchmark with CometTPCQueryBas benchmark.addCase(s"$name$nameSuffix: Spark Scan + Spark Exec") { _ => cometSpark.sql(queryString).noop() } - benchmark.addCase(s"$name$nameSuffix: Comet Scan + Spark Exec") { _ => + benchmark.addCase(s"$name$nameSuffix: Comet (Scan)") { _ => withSQLConf(CometConf.COMET_ENABLED.key -> "true") { cometSpark.sql(queryString).noop() } } - benchmark.addCase(s"$name$nameSuffix: Comet Scan + Comet Exec") { _ => + benchmark.addCase(s"$name$nameSuffix: Comet (Scan, Exec)") { _ => withSQLConf( CometConf.COMET_ENABLED.key -> "true", CometConf.COMET_EXEC_ENABLED.key -> "true", @@ -79,7 +79,7 @@ trait CometTPCQueryBenchmarkBase extends SqlBasedBenchmark with CometTPCQueryBas cometSpark.sql(queryString).noop() } } - benchmark.addCase(s"$name$nameSuffix: Spark Scan + Comet Exec") { _ => + benchmark.addCase(s"$name$nameSuffix: Comet (Exec)") { _ => withSQLConf( CometConf.COMET_ENABLED.key -> "true", CometConf.COMET_EXEC_ENABLED.key -> "true", From 3b61edd0f33e43ef922c07eb0758d687fe77400c Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Thu, 22 Aug 2024 21:58:40 -0600 Subject: [PATCH 3/5] address feedback --- .../apache/spark/sql/benchmark/CometTPCQueryBenchmarkBase.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometTPCQueryBenchmarkBase.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometTPCQueryBenchmarkBase.scala index c9ec4ed6e..7e9bdbc9e 100644 --- a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometTPCQueryBenchmarkBase.scala +++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometTPCQueryBenchmarkBase.scala @@ -62,7 +62,7 @@ trait CometTPCQueryBenchmarkBase extends SqlBasedBenchmark with CometTPCQueryBas } val numRows = queryRelations.map(tableSizes.getOrElse(_, 0L)).sum val benchmark = new Benchmark(benchmarkName, numRows, 2, output = output) - benchmark.addCase(s"$name$nameSuffix: Spark Scan + Spark Exec") { _ => + benchmark.addCase(s"$name$nameSuffix") { _ => cometSpark.sql(queryString).noop() } benchmark.addCase(s"$name$nameSuffix: Comet (Scan)") { _ => From ccc4f174e3e3937a6095ca6461ab949fd1dc8c3e Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Fri, 23 Aug 2024 06:48:09 -0600 Subject: [PATCH 4/5] revert removing env var from usage examples --- .../apache/spark/sql/benchmark/CometAggregateBenchmark.scala | 2 +- .../apache/spark/sql/benchmark/CometArithmeticBenchmark.scala | 3 ++- .../sql/benchmark/CometConditionalExpressionBenchmark.scala | 3 ++- .../spark/sql/benchmark/CometDatetimeExpressionBenchmark.scala | 3 ++- .../org/apache/spark/sql/benchmark/CometExecBenchmark.scala | 3 ++- .../sql/benchmark/CometPredicateExpressionBenchmark.scala | 3 ++- .../org/apache/spark/sql/benchmark/CometReadBenchmark.scala | 3 ++- .../org/apache/spark/sql/benchmark/CometShuffleBenchmark.scala | 3 ++- .../spark/sql/benchmark/CometStringExpressionBenchmark.scala | 3 ++- .../apache/spark/sql/benchmark/CometTPCDSMicroBenchmark.scala | 2 +- .../apache/spark/sql/benchmark/CometTPCDSQueryBenchmark.scala | 2 +- .../apache/spark/sql/benchmark/CometTPCHQueryBenchmark.scala | 2 +- 12 files changed, 20 insertions(+), 12 deletions(-) diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometAggregateBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometAggregateBenchmark.scala index 916f36f5f..86b59050e 100644 --- a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometAggregateBenchmark.scala +++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometAggregateBenchmark.scala @@ -28,7 +28,7 @@ import org.apache.comet.CometConf /** * Benchmark to measure Comet execution performance. To run this benchmark: * {{{ - * make benchmark-org.apache.spark.sql.benchmark.CometAggregateBenchmark + * SPARK_GENERATE_BENCHMARK_FILES=1 make benchmark-org.apache.spark.sql.benchmark.CometAggregateBenchmark * }}} * * Results will be written to "spark/benchmarks/CometAggregateBenchmark-**results.txt". diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometArithmeticBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometArithmeticBenchmark.scala index 1e78c8af2..c6fe55b56 100644 --- a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometArithmeticBenchmark.scala +++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometArithmeticBenchmark.scala @@ -25,7 +25,8 @@ import org.apache.spark.sql.types._ import org.apache.comet.CometConf /** - * Benchmark to measure Comet expression evaluation performance. To run this benchmark: `make + * Benchmark to measure Comet expression evaluation performance. To run this benchmark: + * `SPARK_GENERATE_BENCHMARK_FILES=1 make * benchmark-org.apache.spark.sql.benchmark.CometArithmeticBenchmark` Results will be written to * "spark/benchmarks/CometArithmeticBenchmark-**results.txt". */ diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometConditionalExpressionBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometConditionalExpressionBenchmark.scala index 31adc4518..0dddfb36a 100644 --- a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometConditionalExpressionBenchmark.scala +++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometConditionalExpressionBenchmark.scala @@ -24,7 +24,8 @@ import org.apache.spark.benchmark.Benchmark import org.apache.comet.CometConf /** - * Benchmark to measure Comet execution performance. To run this benchmark: `make + * Benchmark to measure Comet execution performance. To run this benchmark: + * `SPARK_GENERATE_BENCHMARK_FILES=1 make * benchmark-org.apache.spark.sql.benchmark.CometConditionalExpressionBenchmark` Results will be * written to "spark/benchmarks/CometConditionalExpressionBenchmark-**results.txt". */ diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometDatetimeExpressionBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometDatetimeExpressionBenchmark.scala index 7a95471cf..0af1ecade 100644 --- a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometDatetimeExpressionBenchmark.scala +++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometDatetimeExpressionBenchmark.scala @@ -23,7 +23,8 @@ import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.{withDefaultTimeZone import org.apache.spark.sql.internal.SQLConf /** - * Benchmark to measure Comet execution performance. To run this benchmark: `make + * Benchmark to measure Comet execution performance. To run this benchmark: + * `SPARK_GENERATE_BENCHMARK_FILES=1 make * benchmark-org.apache.spark.sql.benchmark.CometDatetimeExpressionBenchmark` Results will be * written to "spark/benchmarks/CometDatetimeExpressionBenchmark-**results.txt". */ diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometExecBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometExecBenchmark.scala index cc219158f..400d9b829 100644 --- a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometExecBenchmark.scala +++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometExecBenchmark.scala @@ -27,7 +27,8 @@ import org.apache.spark.sql.internal.SQLConf import org.apache.comet.{CometConf, CometSparkSessionExtensions} /** - * Benchmark to measure Comet execution performance. To run this benchmark: `make + * Benchmark to measure Comet execution performance. To run this benchmark: + * `SPARK_GENERATE_BENCHMARK_FILES=1 make * benchmark-org.apache.spark.sql.benchmark.CometExecBenchmark` Results will be written to * "spark/benchmarks/CometExecBenchmark-**results.txt". */ diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometPredicateExpressionBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometPredicateExpressionBenchmark.scala index fce81f860..2ca924821 100644 --- a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometPredicateExpressionBenchmark.scala +++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometPredicateExpressionBenchmark.scala @@ -24,7 +24,8 @@ import org.apache.spark.benchmark.Benchmark import org.apache.comet.CometConf /** - * Benchmark to measure Comet execution performance. To run this benchmark: `make + * Benchmark to measure Comet execution performance. To run this benchmark: + * `SPARK_GENERATE_BENCHMARK_FILES=1 make * benchmark-org.apache.spark.sql.benchmark.CometPredicateExpressionBenchmark` Results will be * written to "spark/benchmarks/CometPredicateExpressionBenchmark -**results.txt". */ diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometReadBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometReadBenchmark.scala index ef578dcc4..b47de19ba 100644 --- a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometReadBenchmark.scala +++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometReadBenchmark.scala @@ -34,7 +34,8 @@ import org.apache.comet.CometConf import org.apache.comet.parquet.BatchReader /** - * Benchmark to measure Comet read performance. To run this benchmark: `make + * Benchmark to measure Comet read performance. To run this benchmark: + * `SPARK_GENERATE_BENCHMARK_FILES=1 make * benchmark-org.apache.spark.sql.benchmark.CometReadBenchmark` Results will be written to * "spark/benchmarks/CometReadBenchmark-**results.txt". */ diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometShuffleBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometShuffleBenchmark.scala index 09041a692..46af7115c 100644 --- a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometShuffleBenchmark.scala +++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometShuffleBenchmark.scala @@ -29,7 +29,8 @@ import org.apache.comet.CometConf import org.apache.comet.CometSparkSessionExtensions /** - * Benchmark to measure Comet shuffle performance. To run this benchmark: `make + * Benchmark to measure Comet shuffle performance. To run this benchmark: + * `SPARK_GENERATE_BENCHMARK_FILES=1 make * benchmark-org.apache.spark.sql.benchmark.CometShuffleBenchmark` Results will be written to * "spark/benchmarks/CometShuffleBenchmark-**results.txt". */ diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometStringExpressionBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometStringExpressionBenchmark.scala index 376fee6ca..0546c9173 100644 --- a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometStringExpressionBenchmark.scala +++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometStringExpressionBenchmark.scala @@ -24,7 +24,8 @@ import org.apache.spark.benchmark.Benchmark import org.apache.comet.CometConf /** - * Benchmark to measure Comet execution performance. To run this benchmark: `make + * Benchmark to measure Comet execution performance. To run this benchmark: + * `SPARK_GENERATE_BENCHMARK_FILES=1 make * benchmark-org.apache.spark.sql.benchmark.CometStringExpressionBenchmark` Results will be * written to "spark/benchmarks/CometStringExpressionBenchmark-**results.txt". */ diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometTPCDSMicroBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometTPCDSMicroBenchmark.scala index f40e4d37c..d007c73e6 100644 --- a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometTPCDSMicroBenchmark.scala +++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometTPCDSMicroBenchmark.scala @@ -45,7 +45,7 @@ import org.apache.comet.CometConf * make benchmark-org.apache.spark.sql.GenTPCDSData -- --dsdgenDir /tmp/tpcds-kit/tools --location /tmp/tpcds --scaleFactor 1 * * // CometTPCDSMicroBenchmark - * make benchmark-org.apache.spark.sql.benchmark.CometTPCDSMicroBenchmark -- --data-location /tmp/tpcds + * SPARK_GENERATE_BENCHMARK_FILES=1 make benchmark-org.apache.spark.sql.benchmark.CometTPCDSMicroBenchmark -- --data-location /tmp/tpcds * }}} * * Results will be written to "spark/benchmarks/CometTPCDSMicroBenchmark-**results.txt". diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometTPCDSQueryBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometTPCDSQueryBenchmark.scala index ca845f5a3..1be5d685d 100644 --- a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometTPCDSQueryBenchmark.scala +++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometTPCDSQueryBenchmark.scala @@ -38,7 +38,7 @@ import org.apache.spark.sql.types.StructType * make benchmark-org.apache.spark.sql.GenTPCDSData -- --dsdgenDir /tmp/tpcds-kit/tools --location /tmp/tpcds --scaleFactor 1 * * // CometTPCDSQueryBenchmark - * make benchmark-org.apache.spark.sql.benchmark.CometTPCDSQueryBenchmark -- --data-location /tmp/tpcds + * SPARK_GENERATE_BENCHMARK_FILES=1 make benchmark-org.apache.spark.sql.benchmark.CometTPCDSQueryBenchmark -- --data-location /tmp/tpcds * }}} * * Results will be written to "spark/benchmarks/CometTPCDSQueryBenchmark-**results.txt". diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometTPCHQueryBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometTPCHQueryBenchmark.scala index c5c8a2301..af1ee3a49 100644 --- a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometTPCHQueryBenchmark.scala +++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometTPCHQueryBenchmark.scala @@ -34,7 +34,7 @@ import java.util.Locale * make benchmark-org.apache.spark.sql.GenTPCHData -- --location /tmp --scaleFactor ${scale_factor} * * // CometTPCHQueryBenchmark - * make benchmark-org.apache.spark.sql.benchmark.CometTPCHQueryBenchmark -- --data-location /tmp/tpch/sf${scale_factor}_parquet + * SPARK_GENERATE_BENCHMARK_FILES=1 make benchmark-org.apache.spark.sql.benchmark.CometTPCHQueryBenchmark -- --data-location /tmp/tpch/sf${scale_factor}_parquet * }}} * * Results will be written to "spark/benchmarks/CometTPCHQueryBenchmark-**results.txt". From 75f94c4cbdb70d844f037fbf1e0eb605eb007e7b Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Fri, 23 Aug 2024 06:49:34 -0600 Subject: [PATCH 5/5] fix --- .../apache/spark/sql/benchmark/CometTPCDSMicroBenchmark.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometTPCDSMicroBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometTPCDSMicroBenchmark.scala index d007c73e6..40a84a125 100644 --- a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometTPCDSMicroBenchmark.scala +++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometTPCDSMicroBenchmark.scala @@ -127,6 +127,8 @@ object CometTPCDSMicroBenchmark extends CometTPCQueryBenchmarkBase { benchmark.addCase(s"$name$nameSuffix: Comet (Exec)") { _ => withSQLConf( CometConf.COMET_ENABLED.key -> "true", + CometConf.COMET_NATIVE_SCAN_ENABLED.key -> "false", + CometConf.COMET_CONVERT_FROM_PARQUET_ENABLED.key -> "true", CometConf.COMET_EXEC_SHUFFLE_ENABLED.key -> "true", CometConf.COMET_SHUFFLE_MODE.key -> "auto", CometConf.COMET_REGEXP_ALLOW_INCOMPATIBLE.key -> "true",