Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions datafusion/src/optimizer/filter_push_down.rs
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,9 @@ fn split_members<'a>(predicate: &'a Expr, predicates: &mut Vec<&'a Expr>) {
split_members(left, predicates);
split_members(right, predicates);
}
Expr::Alias(expr, _) => {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💯 excellent

split_members(expr, predicates);
}
other => predicates.push(other),
}
}
Expand Down Expand Up @@ -308,6 +311,7 @@ fn optimize(plan: &LogicalPlan, mut state: State) -> Result<LogicalPlan> {
}
Ok(())
})?;

// Predicates without columns will not be pushed down.
// As those contain only literals, they could be optimized using constant folding
// and removal of WHERE TRUE / WHERE FALSE
Expand Down
129 changes: 129 additions & 0 deletions datafusion/tests/sql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3543,6 +3543,135 @@ async fn explain_analyze_runs_optimizers() {
assert_contains!(actual, expected);
}

#[tokio::test]
async fn tpch_explain_q10() -> Result<()> {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice 👍
Maybe it would be good to have those in the tpch crate instead?
In that case we could include some / all other queries as well.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree -- filed #1377 to track this

let mut ctx = ExecutionContext::new();

register_tpch_csv(&mut ctx, "customer").await?;
register_tpch_csv(&mut ctx, "orders").await?;
register_tpch_csv(&mut ctx, "lineitem").await?;
register_tpch_csv(&mut ctx, "nation").await?;

let sql = "select
c_custkey,
c_name,
sum(l_extendedprice * (1 - l_discount)) as revenue,
c_acctbal,
n_name,
c_address,
c_phone,
c_comment
from
customer,
orders,
lineitem,
nation
where
c_custkey = o_custkey
and l_orderkey = o_orderkey
and o_orderdate >= date '1993-10-01'
and o_orderdate < date '1994-01-01'
and l_returnflag = 'R'
and c_nationkey = n_nationkey
group by
c_custkey,
c_name,
c_acctbal,
c_phone,
n_name,
c_address,
c_comment
order by
revenue desc;";

let mut plan = ctx.create_logical_plan(sql);
plan = ctx.optimize(&plan.unwrap());

let expected = "\
Sort: #revenue DESC NULLS FIRST\
\n Projection: #customer.c_custkey, #customer.c_name, #SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS revenue, #customer.c_acctbal, #nation.n_name, #customer.c_address, #customer.c_phone, #customer.c_comment\
\n Aggregate: groupBy=[[#customer.c_custkey, #customer.c_name, #customer.c_acctbal, #customer.c_phone, #nation.n_name, #customer.c_address, #customer.c_comment]], aggr=[[SUM(#lineitem.l_extendedprice * Int64(1) - #lineitem.l_discount)]]\
\n Join: #customer.c_nationkey = #nation.n_nationkey\
\n Join: #orders.o_orderkey = #lineitem.l_orderkey\
\n Join: #customer.c_custkey = #orders.o_custkey\
\n TableScan: customer projection=Some([0, 1, 2, 3, 4, 5, 7])\
\n Filter: #orders.o_orderdate >= Date32(\"8674\") AND #orders.o_orderdate < Date32(\"8766\")\
\n TableScan: orders projection=Some([0, 1, 4]), filters=[#orders.o_orderdate >= Date32(\"8674\"), #orders.o_orderdate < Date32(\"8766\")]\
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so lovely to see those filters pushed down ❤️

\n Filter: #lineitem.l_returnflag = Utf8(\"R\")\
\n TableScan: lineitem projection=Some([0, 5, 6, 8]), filters=[#lineitem.l_returnflag = Utf8(\"R\")]\
\n TableScan: nation projection=Some([0, 1])";
assert_eq!(format!("{:?}", plan.unwrap()), expected);

Ok(())
}

fn get_tpch_table_schema(table: &str) -> Schema {
match table {
"customer" => Schema::new(vec![
Field::new("c_custkey", DataType::Int64, false),
Field::new("c_name", DataType::Utf8, false),
Field::new("c_address", DataType::Utf8, false),
Field::new("c_nationkey", DataType::Int64, false),
Field::new("c_phone", DataType::Utf8, false),
Field::new("c_acctbal", DataType::Float64, false),
Field::new("c_mktsegment", DataType::Utf8, false),
Field::new("c_comment", DataType::Utf8, false),
]),

"orders" => Schema::new(vec![
Field::new("o_orderkey", DataType::Int64, false),
Field::new("o_custkey", DataType::Int64, false),
Field::new("o_orderstatus", DataType::Utf8, false),
Field::new("o_totalprice", DataType::Float64, false),
Field::new("o_orderdate", DataType::Date32, false),
Field::new("o_orderpriority", DataType::Utf8, false),
Field::new("o_clerk", DataType::Utf8, false),
Field::new("o_shippriority", DataType::Int32, false),
Field::new("o_comment", DataType::Utf8, false),
]),

"lineitem" => Schema::new(vec![
Field::new("l_orderkey", DataType::Int64, false),
Field::new("l_partkey", DataType::Int64, false),
Field::new("l_suppkey", DataType::Int64, false),
Field::new("l_linenumber", DataType::Int32, false),
Field::new("l_quantity", DataType::Float64, false),
Field::new("l_extendedprice", DataType::Float64, false),
Field::new("l_discount", DataType::Float64, false),
Field::new("l_tax", DataType::Float64, false),
Field::new("l_returnflag", DataType::Utf8, false),
Field::new("l_linestatus", DataType::Utf8, false),
Field::new("l_shipdate", DataType::Date32, false),
Field::new("l_commitdate", DataType::Date32, false),
Field::new("l_receiptdate", DataType::Date32, false),
Field::new("l_shipinstruct", DataType::Utf8, false),
Field::new("l_shipmode", DataType::Utf8, false),
Field::new("l_comment", DataType::Utf8, false),
]),

"nation" => Schema::new(vec![
Field::new("n_nationkey", DataType::Int64, false),
Field::new("n_name", DataType::Utf8, false),
Field::new("n_regionkey", DataType::Int64, false),
Field::new("n_comment", DataType::Utf8, false),
]),

_ => unimplemented!(),
}
}

async fn register_tpch_csv(ctx: &mut ExecutionContext, table: &str) -> Result<()> {
let schema = get_tpch_table_schema(table);

ctx.register_csv(
table,
format!("tests/tpch-csv/{}.csv", table).as_str(),
CsvReadOptions::new().schema(&schema),
)
.await?;
Ok(())
}

async fn register_aggregate_csv_by_sql(ctx: &mut ExecutionContext) {
let testdata = datafusion::test_util::arrow_test_data();

Expand Down
10 changes: 10 additions & 0 deletions datafusion/tests/tpch-csv/customer.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
c_custkey,c_name,c_address,c_nationkey,c_phone,c_acctbal,c_mktsegment,c_comment
2,Customer#000000002,"XSTf4,NCwDVaWNe6tEgvwfmRchLXak",13,23-768-687-3665,121.65,AUTOMOBILE,l accounts. blithely ironic theodolites integrate boldly: caref
3,Customer#000000003,MG9kdTD2WBHm,1,11-719-748-3364,7498.12,AUTOMOBILE," deposits eat slyly ironic, even instructions. express foxes detect slyly. blithely even accounts abov"
4,Customer#000000004,XxVSJsLAGtn,4,14-128-190-5944,2866.83,MACHINERY," requests. final, regular ideas sleep final accou"
5,Customer#000000005,KvpyuHCplrB84WgAiGV6sYpZq7Tj,3,13-750-942-6364,794.47,HOUSEHOLD,n accounts will have to unwind. foxes cajole accor
6,Customer#000000006,"sKZz0CsnMD7mp4Xd0YrBvx,LREYKUWAh yVn",20,30-114-968-4951,7638.57,AUTOMOBILE,tions. even deposits boost according to the slyly bold packages. final accounts cajole requests. furious
7,Customer#000000007,TcGe5gaZNgVePxU5kRrvXBfkasDTea,18,28-190-982-9759,9561.95,AUTOMOBILE,"ainst the ironic, express theodolites. express, even pinto beans among the exp"
8,Customer#000000008,"I0B10bB0AymmC, 0PrRYBCP1yGJ8xcBPmWhl5",17,27-147-574-9335,6819.74,BUILDING,among the slyly regular theodolites kindle blithely courts. carefully even theodolites haggle slyly along the ide
9,Customer#000000009,xKiAFTjUsCuxfeleNqefumTrjS,8,18-338-906-3675,8324.07,FURNITURE,r theodolites according to the requests wake thinly excuses: pending requests haggle furiousl
10,Customer#000000010,6LrEaV6KR6PLVcgl2ArL Q3rqzLzcT1 v2,5,15-741-346-9870,2753.54,HOUSEHOLD,es regular deposits haggle. fur
10 changes: 10 additions & 0 deletions datafusion/tests/tpch-csv/lineitem.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
l_orderkey,l_partkey,l_suppkey,l_linenumber,l_quantity,l_extendedprice,l_discount,l_tax,l_returnflag,l_linestatus,l_shipdate,l_commitdate,l_receiptdate,l_shipinstruct,l_shipmode,l_comment
1,67310,7311,2,36.0,45983.16,0.09,0.06,N,O,1996-04-12,1996-02-28,1996-04-20,TAKE BACK RETURN,MAIL,ly final dependencies: slyly bold
1,63700,3701,3,8.0,13309.6,0.1,0.02,N,O,1996-01-29,1996-03-05,1996-01-31,TAKE BACK RETURN,REG AIR,"riously. regular, express dep"
1,2132,4633,4,28.0,28955.64,0.09,0.06,N,O,1996-04-21,1996-03-30,1996-05-16,NONE,AIR,lites. fluffily even de
1,24027,1534,5,24.0,22824.48,0.1,0.04,N,O,1996-03-30,1996-03-14,1996-04-01,NONE,FOB, pending foxes. slyly re
1,15635,638,6,32.0,49620.16,0.07,0.02,N,O,1996-01-30,1996-02-07,1996-02-03,DELIVER IN PERSON,MAIL,arefully slyly ex
2,106170,1191,1,38.0,44694.46,0.0,0.05,N,O,1997-01-28,1997-01-14,1997-02-02,TAKE BACK RETURN,RAIL,ven requests. deposits breach a
3,4297,1798,1,45.0,54058.05,0.06,0.0,R,F,1994-02-02,1994-01-04,1994-02-23,NONE,AIR,ongside of the furiously brave acco
3,19036,6540,2,49.0,46796.47,0.1,0.0,R,F,1993-11-09,1993-12-20,1993-11-24,TAKE BACK RETURN,RAIL, unusual accounts. eve
3,128449,3474,3,27.0,39890.88,0.06,0.07,A,F,1994-01-16,1993-11-22,1994-01-23,DELIVER IN PERSON,SHIP,nal foxes wake.
11 changes: 11 additions & 0 deletions datafusion/tests/tpch-csv/nation.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
n_nationkey,n_name,n_regionkey,n_comment
1,ARGENTINA,1,al foxes promise slyly according to the regular accounts. bold requests alon
2,BRAZIL,1,y alongside of the pending deposits. carefully special packages are about the ironic forges. slyly special
3,CANADA,1,"eas hang ironic, silent packages. slyly regular packages are furiously over the tithes. fluffily bold"
4,EGYPT,4,y above the carefully unusual theodolites. final dugouts are quickly across the furiously regular d
5,ETHIOPIA,0,ven packages wake quickly. regu
6,FRANCE,3,"refully final requests. regular, ironi"
7,GERMANY,3,"l platelets. regular accounts x-ray: unusual, regular acco"
8,INDIA,2,ss excuses cajole slyly across the packages. deposits print aroun
9,INDONESIA,2, slyly express asymptotes. regular deposits haggle slyly. carefully ironic hockey players sleep blithely. carefull
10,IRAN,4,efully alongside of the slyly final dependencies.
11 changes: 11 additions & 0 deletions datafusion/tests/tpch-csv/orders.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
o_orderkey,o_custkey,o_orderstatus,o_totalprice,o_orderdate,o_orderpriority,o_clerk,o_shippriority,o_comment
2,78002,O,46929.18,1996-12-01,1-URGENT,Clerk#000000880,0," foxes. pending accounts at the pending, silent asymptot"
3,123314,F,193846.25,1993-10-14,5-LOW,Clerk#000000955,0,sly final accounts boost. carefully regular ideas cajole carefully. depos
4,136777,O,32151.78,1995-10-11,5-LOW,Clerk#000000124,0,"sits. slyly regular warthogs cajole. regular, regular theodolites acro"
5,44485,F,144659.2,1994-07-30,5-LOW,Clerk#000000925,0,quickly. bold deposits sleep slyly. packages use slyly
6,55624,F,58749.59,1992-02-21,4-NOT SPECIFIED,Clerk#000000058,0,"ggle. special, final requests are against the furiously specia"
7,39136,O,252004.18,1996-01-10,2-HIGH,Clerk#000000470,0,ly special requests
32,130057,O,208660.75,1995-07-16,2-HIGH,Clerk#000000616,0,"ise blithely bold, regular requests. quickly unusual dep"
33,66958,F,163243.98,1993-10-27,3-MEDIUM,Clerk#000000409,0,uriously. furiously final request
34,61001,O,58949.67,1998-07-21,3-MEDIUM,Clerk#000000223,0,ly final packages. fluffily final deposits wake blithely ideas. spe
35,127588,O,253724.56,1995-10-23,4-NOT SPECIFIED,Clerk#000000259,0,zzle. carefully enticing deposits nag furio