Skip to content

Commit 15f810c

Browse files
Merge branch 'main' into aa/string_builder_char
2 parents 1c49a7b + 5a2933e commit 15f810c

File tree

43 files changed

+1682
-720
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+1682
-720
lines changed

.github/workflows/extended.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ jobs:
136136
- name: Run tests
137137
run: |
138138
cd datafusion
139-
cargo test --profile ci --exclude datafusion-examples --exclude datafusion-benchmarks --exclude datafusion-sqllogictest --workspace --lib --tests --features=force_hash_collisions,avro
139+
cargo test --profile ci --exclude datafusion-examples --exclude datafusion-benchmarks --exclude datafusion-sqllogictest --exclude datafusion-cli --workspace --lib --tests --features=force_hash_collisions,avro
140140
cargo clean
141141
142142
sqllogictest-sqlite:

Cargo.lock

Lines changed: 24 additions & 22 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,7 @@ datafusion-proto-common = { path = "datafusion/proto-common", version = "48.0.0"
141141
datafusion-session = { path = "datafusion/session", version = "48.0.0" }
142142
datafusion-spark = { path = "datafusion/spark", version = "48.0.0" }
143143
datafusion-sql = { path = "datafusion/sql", version = "48.0.0" }
144+
datafusion-substrait = { path = "datafusion/substrait", version = "48.0.0" }
144145
doc-comment = "0.3"
145146
env_logger = "0.11"
146147
futures = "0.3"

benchmarks/bench.sh

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,10 @@ venv: Creates new venv (unless already exists) and installs compare's
7878
**********
7979
all(default): Data/Run/Compare for all benchmarks
8080
tpch: TPCH inspired benchmark on Scale Factor (SF) 1 (~1GB), single parquet file per table, hash join
81+
tpch_csv: TPCH inspired benchmark on Scale Factor (SF) 1 (~1GB), single csv file per table, hash join
8182
tpch_mem: TPCH inspired benchmark on Scale Factor (SF) 1 (~1GB), query from memory
8283
tpch10: TPCH inspired benchmark on Scale Factor (SF) 10 (~10GB), single parquet file per table, hash join
84+
tpch_csv10: TPCH inspired benchmark on Scale Factor (SF) 10 (~10GB), single csv file per table, hash join
8385
tpch_mem10: TPCH inspired benchmark on Scale Factor (SF) 10 (~10GB), query from memory
8486
cancellation: How long cancelling a query takes
8587
parquet: Benchmark of parquet reader's filtering speed
@@ -266,9 +268,11 @@ main() {
266268
mkdir -p "${DATA_DIR}"
267269
case "$BENCHMARK" in
268270
all)
269-
run_tpch "1"
271+
run_tpch "1" "parquet"
272+
run_tpch "1" "csv"
270273
run_tpch_mem "1"
271-
run_tpch "10"
274+
run_tpch "10" "parquet"
275+
run_tpch "10" "csv"
272276
run_tpch_mem "10"
273277
run_cancellation
274278
run_parquet
@@ -286,13 +290,19 @@ main() {
286290
run_external_aggr
287291
;;
288292
tpch)
289-
run_tpch "1"
293+
run_tpch "1" "parquet"
294+
;;
295+
tpch_csv)
296+
run_tpch "1" "csv"
290297
;;
291298
tpch_mem)
292299
run_tpch_mem "1"
293300
;;
294301
tpch10)
295-
run_tpch "10"
302+
run_tpch "10" "parquet"
303+
;;
304+
tpch_csv10)
305+
run_tpch "10" "csv"
296306
;;
297307
tpch_mem10)
298308
run_tpch_mem "10"
@@ -430,6 +440,17 @@ data_tpch() {
430440
$CARGO_COMMAND --bin tpch -- convert --input "${TPCH_DIR}" --output "${TPCH_DIR}" --format parquet
431441
popd > /dev/null
432442
fi
443+
444+
# Create 'csv' files from tbl
445+
FILE="${TPCH_DIR}/csv/supplier"
446+
if test -d "${FILE}"; then
447+
echo " csv files exist ($FILE exists)."
448+
else
449+
echo " creating csv files using benchmark binary ..."
450+
pushd "${SCRIPT_DIR}" > /dev/null
451+
$CARGO_COMMAND --bin tpch -- convert --input "${TPCH_DIR}" --output "${TPCH_DIR}/csv" --format csv
452+
popd > /dev/null
453+
fi
433454
}
434455

435456
# Runs the tpch benchmark
@@ -446,7 +467,9 @@ run_tpch() {
446467
echo "Running tpch benchmark..."
447468
# Optional query filter to run specific query
448469
QUERY=$([ -n "$ARG3" ] && echo "--query $ARG3" || echo "")
449-
debug_run $CARGO_COMMAND --bin tpch -- benchmark datafusion --iterations 5 --path "${TPCH_DIR}" --prefer_hash_join "${PREFER_HASH_JOIN}" --format parquet -o "${RESULTS_FILE}" $QUERY
470+
471+
FORMAT=$2
472+
debug_run $CARGO_COMMAND --bin tpch -- benchmark datafusion --iterations 5 --path "${TPCH_DIR}" --prefer_hash_join "${PREFER_HASH_JOIN}" --format ${FORMAT} -o "${RESULTS_FILE}" $QUERY
450473
}
451474

452475
# Runs the tpch in memory

benchmarks/src/tpch/run.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -274,7 +274,7 @@ impl RunOpt {
274274
(Arc::new(format), path, ".tbl")
275275
}
276276
"csv" => {
277-
let path = format!("{path}/{table}");
277+
let path = format!("{path}/csv/{table}");
278278
let format = CsvFormat::default()
279279
.with_delimiter(b',')
280280
.with_has_header(true);

datafusion-cli/Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ backtrace = ["datafusion/backtrace"]
3737
[dependencies]
3838
arrow = { workspace = true }
3939
async-trait = { workspace = true }
40-
aws-config = "1.6.2"
40+
aws-config = "1.8.0"
4141
aws-credential-types = "1.2.0"
4242
clap = { version = "4.5.40", features = ["derive", "cargo"] }
4343
datafusion = { workspace = true, features = [
@@ -55,6 +55,7 @@ datafusion = { workspace = true, features = [
5555
dirs = "6.0.0"
5656
env_logger = { workspace = true }
5757
futures = { workspace = true }
58+
log = { workspace = true }
5859
mimalloc = { version = "0.1", default-features = false }
5960
object_store = { workspace = true, features = ["aws", "gcp", "http"] }
6061
parking_lot = { workspace = true }

0 commit comments

Comments
 (0)