Skip to content

Commit b378076

Browse files
committed
chore: Add TPCDS benchmarks
1 parent 3948b6a commit b378076

File tree

5 files changed

+48
-30
lines changed

5 files changed

+48
-30
lines changed

benchmarks/bench.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -658,7 +658,7 @@ run_tpcds() {
658658
echo "RESULTS_FILE: ${RESULTS_FILE}"
659659
echo "Running tpcds benchmark..."
660660

661-
debug_run $CARGO_COMMAND --bin tpcds -- benchmark datafusion --iterations 5 --path "${TPCDS_DIR}" --query_path "${SCRIPT_DIR}/queries/tpcds" --prefer_hash_join "${PREFER_HASH_JOIN}" --format parquet -o "${RESULTS_FILE}" ${QUERY_ARG}
661+
debug_run $CARGO_COMMAND --bin dfbench -- tpcds --iterations 5 --path "${TPCDS_DIR}" --query_path "${SCRIPT_DIR}/queries/tpcds" --prefer_hash_join "${PREFER_HASH_JOIN}" -o "${RESULTS_FILE}" ${QUERY_ARG}
662662
}
663663

664664
# Runs the compile profile benchmark helper

benchmarks/src/bin/dfbench.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ static ALLOC: snmalloc_rs::SnMalloc = snmalloc_rs::SnMalloc;
3434
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
3535

3636
use datafusion_benchmarks::{
37-
cancellation, clickbench, h2o, hj, imdb, nlj, sort_tpch, tpch, tpcds
37+
cancellation, clickbench, h2o, hj, imdb, nlj, sort_tpch, tpcds, tpch,
3838
};
3939

4040
#[derive(Debug, StructOpt)]

benchmarks/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,5 +23,6 @@ pub mod hj;
2323
pub mod imdb;
2424
pub mod nlj;
2525
pub mod sort_tpch;
26+
pub mod tpcds;
2627
pub mod tpch;
2728
pub mod util;

benchmarks/src/tpcds/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
mod run;
2-
pub use run::RunOpt;
2+
pub use run::RunOpt;

benchmarks/src/tpcds/run.rs

Lines changed: 44 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -15,16 +15,15 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18+
use std::fs;
1819
use std::path::PathBuf;
1920
use std::sync::Arc;
2021

2122
use crate::util::{print_memory_stats, BenchmarkRun, CommonOpt, QueryResult};
2223

2324
use arrow::record_batch::RecordBatch;
2425
use arrow::util::pretty::{self, pretty_format_batches};
25-
use datafusion::datasource::file_format::csv::CsvFormat;
2626
use datafusion::datasource::file_format::parquet::ParquetFormat;
27-
use datafusion::datasource::file_format::FileFormat;
2827
use datafusion::datasource::listing::{
2928
ListingOptions, ListingTable, ListingTableConfig, ListingTableUrl,
3029
};
@@ -35,7 +34,7 @@ use datafusion::physical_plan::{collect, displayable};
3534
use datafusion::prelude::*;
3635
use datafusion_common::instant::Instant;
3736
use datafusion_common::utils::get_available_parallelism;
38-
use datafusion_common::{DEFAULT_CSV_EXTENSION, DEFAULT_PARQUET_EXTENSION};
37+
use datafusion_common::{plan_err, DEFAULT_PARQUET_EXTENSION};
3938

4039
use log::info;
4140
use structopt::StructOpt;
@@ -46,32 +45,49 @@ pub const TPCDS_QUERY_START_ID: usize = 1;
4645
pub const TPCDS_QUERY_END_ID: usize = 99;
4746

4847
pub const TPCDS_TABLES: &[&str] = &[
49-
"call_center", "customer_address", "household_demographics", "promotion", "store_sales", "web_page",
50-
"catalog_page", "customer_demographics", "income_band", "reason", "store", "web_returns",
51-
"catalog_returns", "customer", "inventory", "ship_mode", "time_dim", "web_sales",
52-
"catalog_sales", "date_dim", "item", "store_returns", "warehouse", "web_site",
48+
"call_center",
49+
"customer_address",
50+
"household_demographics",
51+
"promotion",
52+
"store_sales",
53+
"web_page",
54+
"catalog_page",
55+
"customer_demographics",
56+
"income_band",
57+
"reason",
58+
"store",
59+
"web_returns",
60+
"catalog_returns",
61+
"customer",
62+
"inventory",
63+
"ship_mode",
64+
"time_dim",
65+
"web_sales",
66+
"catalog_sales",
67+
"date_dim",
68+
"item",
69+
"store_returns",
70+
"warehouse",
71+
"web_site",
5372
];
5473

5574
/// Get the SQL statements from the specified query file
56-
pub fn get_query_sql(query: usize) -> Result<Vec<String>> {
75+
pub fn get_query_sql(base_query_path: &str, query: usize) -> Result<Vec<String>> {
5776
if query > 0 && query < 100 {
58-
let possibilities = vec![
59-
format!("queries/q{query}.sql"),
60-
];
77+
let filename = format!("{base_query_path}/q{query}.sql");
6178
let mut errors = vec![];
62-
for filename in possibilities {
63-
match fs::read_to_string(&filename) {
64-
Ok(contents) => {
65-
return Ok(contents
66-
.split(';')
67-
.map(|s| s.trim())
68-
.filter(|s| !s.is_empty())
69-
.map(|s| s.to_string())
70-
.collect());
71-
}
72-
Err(e) => errors.push(format!("{filename}: {e}")),
73-
};
74-
}
79+
match fs::read_to_string(&filename) {
80+
Ok(contents) => {
81+
return Ok(contents
82+
.split(';')
83+
.map(|s| s.trim())
84+
.filter(|s| !s.is_empty())
85+
.map(|s| s.to_string())
86+
.collect());
87+
}
88+
Err(e) => errors.push(format!("{filename}: {e}")),
89+
};
90+
7591
plan_err!("invalid query. Could not find query: {:?}", errors)
7692
} else {
7793
plan_err!("invalid query. Expected value between 1 and 99")
@@ -180,7 +196,7 @@ impl RunOpt {
180196
// run benchmark
181197
let mut query_results = vec![];
182198

183-
let sql = &get_query_sql(query_id)?;
199+
let sql = &get_query_sql(self.query_path.to_str().unwrap(), query_id)?;
184200

185201
for i in 0..self.iterations() {
186202
let start = Instant::now();
@@ -296,11 +312,12 @@ impl RunOpt {
296312
// Obtain a snapshot of the SessionState
297313
let state = ctx.state();
298314
let path = format!("{path}/{table}");
299-
let format = ParquetFormat::default().with_options(ctx.state().table_options().parquet.clone());
315+
let format = ParquetFormat::default()
316+
.with_options(ctx.state().table_options().parquet.clone());
300317
let extension = DEFAULT_PARQUET_EXTENSION;
301318

302319
let table_path = ListingTableUrl::parse(path)?;
303-
let options = ListingOptions::new(format)
320+
let options = ListingOptions::new(Arc::new(format))
304321
.with_file_extension(extension)
305322
.with_target_partitions(target_partitions)
306323
.with_collect_stat(state.config().collect_statistics());

0 commit comments

Comments
 (0)