Skip to content

Commit

Permalink
Run benchmark after generating the data through databricks/spark-sql-…
Browse files Browse the repository at this point in the history
…perf.

Run command:
```
SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain org.apache.spark.sql.execution.benchmark.TPCDSQueryBenchmark --data-location hdfs://myhdfs:9000/user/hive/warehouse/tpcds5t --cbo"
```

Please see more details: databricks/spark-sql-perf#189
  • Loading branch information
wangyum committed Nov 1, 2024
1 parent 4205b79 commit 94c8484
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,19 @@ class KryoSerializer(conf: SparkConf)
kryo.register(Utils.classForName("scala.reflect.ClassTag$GenericClassTag"))
kryo.register(classOf[ArrayBuffer[Any]])
kryo.register(classOf[Array[Array[Byte]]])
kryo.register(classOf[org.apache.hadoop.fs.Path])
kryo.register(classOf[Array[org.apache.hadoop.fs.Path]])
kryo.register(classOf[org.apache.hadoop.fs.FileStatus])
kryo.register(classOf[Array[org.apache.hadoop.fs.FileStatus]])
kryo.register(classOf[org.apache.hadoop.hdfs.protocol.HdfsLocatedFileStatus])
kryo.register(Utils.classForName("java.util.Collections$EmptySet"))
kryo.register(classOf[org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy])
kryo.register(classOf[Array[org.apache.hadoop.fs.BlockLocation]])
kryo.register(classOf[org.apache.hadoop.fs.BlockLocation])
kryo.register(classOf[Array[org.apache.hadoop.fs.StorageType]])
kryo.register(classOf[org.apache.hadoop.fs.StorageType])
kryo.register(classOf[org.apache.hadoop.hdfs.protocol.FsPermissionExtension])
kryo.register(classOf[org.apache.hadoop.fs.permission.FsAction])

// We can't load those class directly in order to avoid unnecessary jar dependencies.
// We load them safely, ignore it if the class not found.
Expand Down
14 changes: 7 additions & 7 deletions sql/core/src/test/scala/org/apache/spark/sql/TPCDSSchema.scala
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ trait TPCDSSchema {
|`ss_addr_sk` INT,
|`ss_store_sk` INT,
|`ss_promo_sk` INT,
|`ss_ticket_number` INT,
|`ss_ticket_number` BIGINT,
|`ss_quantity` INT,
|`ss_wholesale_cost` DECIMAL(7,2),
|`ss_list_price` DECIMAL(7,2),
Expand All @@ -81,7 +81,7 @@ trait TPCDSSchema {
|`sr_addr_sk` INT,
|`sr_store_sk` INT,
|`sr_reason_sk` INT,
|`sr_ticket_number` INT,
|`sr_ticket_number` BIGINT,
|`sr_return_quantity` INT,
|`sr_return_amt` DECIMAL(7,2),
|`sr_return_tax` DECIMAL(7,2),
Expand Down Expand Up @@ -112,7 +112,7 @@ trait TPCDSSchema {
|`cs_warehouse_sk` INT,
|`cs_item_sk` INT,
|`cs_promo_sk` INT,
|`cs_order_number` INT,
|`cs_order_number` BIGINT,
|`cs_quantity` INT,
|`cs_wholesale_cost` DECIMAL(7,2),
|`cs_list_price` DECIMAL(7,2),
Expand Down Expand Up @@ -147,7 +147,7 @@ trait TPCDSSchema {
|`cr_catalog_page_sk` INT,
|`cr_ship_mode_sk` INT,
|`cr_warehouse_sk` INT,
|`cr_reason_sk` INT,`cr_order_number` INT,
|`cr_reason_sk` INT,`cr_order_number` BIGINT,
|`cr_return_quantity` INT,
|`cr_return_amount` DECIMAL(7,2),
|`cr_return_tax` DECIMAL(7,2),
Expand Down Expand Up @@ -178,7 +178,7 @@ trait TPCDSSchema {
|`ws_ship_mode_sk` INT,
|`ws_warehouse_sk` INT,
|`ws_promo_sk` INT,
|`ws_order_number` INT,
|`ws_order_number` BIGINT,
|`ws_quantity` INT,
|`ws_wholesale_cost` DECIMAL(7,2),
|`ws_list_price` DECIMAL(7,2),
Expand Down Expand Up @@ -211,7 +211,7 @@ trait TPCDSSchema {
|`wr_returning_addr_sk` INT,
|`wr_web_page_sk` INT,
|`wr_reason_sk` INT,
|`wr_order_number` INT,
|`wr_order_number` BIGINT,
|`wr_return_quantity` INT,
|`wr_return_amt` DECIMAL(7,2),
|`wr_return_tax` DECIMAL(7,2),
Expand Down Expand Up @@ -390,7 +390,7 @@ trait TPCDSSchema {
|`c_birth_country` VARCHAR(20),
|`c_login` CHAR(13),
|`c_email_address` CHAR(50),
|`c_last_review_date` INT
|`c_last_review_date` STRING
""".stripMargin,
"customer_address" ->
"""
Expand Down

0 comments on commit 94c8484

Please sign in to comment.