Skip to content

Commit

Permalink
feat: add rule to merge projection. (apache#5349)
Browse files Browse the repository at this point in the history
  • Loading branch information
jackwener authored and jiangzhx committed Feb 24, 2023
1 parent 92dfceb commit e569fef
Show file tree
Hide file tree
Showing 9 changed files with 252 additions and 87 deletions.
19 changes: 8 additions & 11 deletions benchmarks/expected-plans/q15.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,16 @@ Sort: supplier.s_suppkey ASC NULLS LAST
Inner Join: supplier.s_suppkey = revenue0.supplier_no
TableScan: supplier projection=[s_suppkey, s_name, s_address, s_phone]
SubqueryAlias: revenue0
Projection: supplier_no, total_revenue
Projection: lineitem.l_suppkey AS supplier_no, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS total_revenue
Aggregate: groupBy=[[lineitem.l_suppkey]], aggr=[[SUM(CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]]
Filter: lineitem.l_shipdate >= Date32("9496") AND lineitem.l_shipdate < Date32("9587")
TableScan: lineitem projection=[l_suppkey, l_extendedprice, l_discount, l_shipdate]
Projection: lineitem.l_suppkey AS supplier_no, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS total_revenue
Aggregate: groupBy=[[lineitem.l_suppkey]], aggr=[[SUM(CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]]
Filter: lineitem.l_shipdate >= Date32("9496") AND lineitem.l_shipdate < Date32("9587")
TableScan: lineitem projection=[l_suppkey, l_extendedprice, l_discount, l_shipdate]
SubqueryAlias: __scalar_sq_1
Projection: MAX(revenue0.total_revenue) AS __value
Aggregate: groupBy=[[]], aggr=[[MAX(revenue0.total_revenue)]]
SubqueryAlias: revenue0
Projection: total_revenue
Projection: SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS total_revenue
Projection: SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)
Aggregate: groupBy=[[lineitem.l_suppkey]], aggr=[[SUM(CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]]
Filter: lineitem.l_shipdate >= Date32("9496") AND lineitem.l_shipdate < Date32("9587")
TableScan: lineitem projection=[l_suppkey, l_extendedprice, l_discount, l_shipdate]
Projection: SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS total_revenue
Aggregate: groupBy=[[lineitem.l_suppkey]], aggr=[[SUM(CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]]
Filter: lineitem.l_shipdate >= Date32("9496") AND lineitem.l_shipdate < Date32("9587")
TableScan: lineitem projection=[l_suppkey, l_extendedprice, l_discount, l_shipdate]
EmptyRelation
25 changes: 12 additions & 13 deletions benchmarks/expected-plans/q16.txt
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
Sort: supplier_cnt DESC NULLS FIRST, part.p_brand ASC NULLS LAST, part.p_type ASC NULLS LAST, part.p_size ASC NULLS LAST
Projection: part.p_brand, part.p_type, part.p_size, COUNT(DISTINCT partsupp.ps_suppkey) AS supplier_cnt
Projection: group_alias_0 AS part.p_brand, group_alias_1 AS part.p_type, group_alias_2 AS part.p_size, COUNT(alias1) AS COUNT(DISTINCT partsupp.ps_suppkey)
Aggregate: groupBy=[[group_alias_0, group_alias_1, group_alias_2]], aggr=[[COUNT(alias1)]]
Aggregate: groupBy=[[part.p_brand AS group_alias_0, part.p_type AS group_alias_1, part.p_size AS group_alias_2, partsupp.ps_suppkey AS alias1]], aggr=[[]]
LeftAnti Join: partsupp.ps_suppkey = __correlated_sq_1.s_suppkey
Inner Join: partsupp.ps_partkey = part.p_partkey
TableScan: partsupp projection=[ps_partkey, ps_suppkey]
Filter: part.p_brand != Utf8("Brand#45") AND part.p_type NOT LIKE Utf8("MEDIUM POLISHED%") AND part.p_size IN ([Int32(49), Int32(14), Int32(23), Int32(45), Int32(19), Int32(3), Int32(36), Int32(9)])
TableScan: part projection=[p_partkey, p_brand, p_type, p_size]
SubqueryAlias: __correlated_sq_1
Projection: supplier.s_suppkey AS s_suppkey
Filter: supplier.s_comment LIKE Utf8("%Customer%Complaints%")
TableScan: supplier projection=[s_suppkey, s_comment]
Projection: group_alias_0 AS part.p_brand, group_alias_1 AS part.p_type, group_alias_2 AS part.p_size, COUNT(alias1) AS supplier_cnt
Aggregate: groupBy=[[group_alias_0, group_alias_1, group_alias_2]], aggr=[[COUNT(alias1)]]
Aggregate: groupBy=[[part.p_brand AS group_alias_0, part.p_type AS group_alias_1, part.p_size AS group_alias_2, partsupp.ps_suppkey AS alias1]], aggr=[[]]
LeftAnti Join: partsupp.ps_suppkey = __correlated_sq_1.s_suppkey
Inner Join: partsupp.ps_partkey = part.p_partkey
TableScan: partsupp projection=[ps_partkey, ps_suppkey]
Filter: part.p_brand != Utf8("Brand#45") AND part.p_type NOT LIKE Utf8("MEDIUM POLISHED%") AND part.p_size IN ([Int32(49), Int32(14), Int32(23), Int32(45), Int32(19), Int32(3), Int32(36), Int32(9)])
TableScan: part projection=[p_partkey, p_brand, p_type, p_size]
SubqueryAlias: __correlated_sq_1
Projection: supplier.s_suppkey AS s_suppkey
Filter: supplier.s_comment LIKE Utf8("%Customer%Complaints%")
TableScan: supplier projection=[s_suppkey, s_comment]
47 changes: 23 additions & 24 deletions benchmarks/expected-plans/q2.txt
Original file line number Diff line number Diff line change
@@ -1,26 +1,25 @@
Sort: supplier.s_acctbal DESC NULLS FIRST, nation.n_name ASC NULLS LAST, supplier.s_name ASC NULLS LAST, part.p_partkey ASC NULLS LAST
Projection: supplier.s_acctbal, supplier.s_name, nation.n_name, part.p_partkey, part.p_mfgr, supplier.s_address, supplier.s_phone, supplier.s_comment
Projection: part.p_partkey, part.p_mfgr, supplier.s_name, supplier.s_address, supplier.s_phone, supplier.s_acctbal, supplier.s_comment, nation.n_name
Inner Join: part.p_partkey = __scalar_sq_1.ps_partkey, partsupp.ps_supplycost = __scalar_sq_1.__value
Inner Join: nation.n_regionkey = region.r_regionkey
Inner Join: supplier.s_nationkey = nation.n_nationkey
Inner Join: partsupp.ps_suppkey = supplier.s_suppkey
Inner Join: part.p_partkey = partsupp.ps_partkey
Filter: part.p_size = Int32(15) AND part.p_type LIKE Utf8("%BRASS")
TableScan: part projection=[p_partkey, p_mfgr, p_type, p_size]
TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_supplycost]
TableScan: supplier projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment]
TableScan: nation projection=[n_nationkey, n_name, n_regionkey]
Filter: region.r_name = Utf8("EUROPE")
TableScan: region projection=[r_regionkey, r_name]
SubqueryAlias: __scalar_sq_1
Projection: partsupp.ps_partkey, MIN(partsupp.ps_supplycost) AS __value
Aggregate: groupBy=[[partsupp.ps_partkey]], aggr=[[MIN(partsupp.ps_supplycost)]]
Inner Join: nation.n_regionkey = region.r_regionkey
Inner Join: supplier.s_nationkey = nation.n_nationkey
Inner Join: partsupp.ps_suppkey = supplier.s_suppkey
TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_supplycost]
TableScan: supplier projection=[s_suppkey, s_nationkey]
TableScan: nation projection=[n_nationkey, n_regionkey]
Filter: region.r_name = Utf8("EUROPE")
TableScan: region projection=[r_regionkey, r_name]
Inner Join: part.p_partkey = __scalar_sq_1.ps_partkey, partsupp.ps_supplycost = __scalar_sq_1.__value
Inner Join: nation.n_regionkey = region.r_regionkey
Inner Join: supplier.s_nationkey = nation.n_nationkey
Inner Join: partsupp.ps_suppkey = supplier.s_suppkey
Inner Join: part.p_partkey = partsupp.ps_partkey
Filter: part.p_size = Int32(15) AND part.p_type LIKE Utf8("%BRASS")
TableScan: part projection=[p_partkey, p_mfgr, p_type, p_size]
TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_supplycost]
TableScan: supplier projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment]
TableScan: nation projection=[n_nationkey, n_name, n_regionkey]
Filter: region.r_name = Utf8("EUROPE")
TableScan: region projection=[r_regionkey, r_name]
SubqueryAlias: __scalar_sq_1
Projection: partsupp.ps_partkey, MIN(partsupp.ps_supplycost) AS __value
Aggregate: groupBy=[[partsupp.ps_partkey]], aggr=[[MIN(partsupp.ps_supplycost)]]
Inner Join: nation.n_regionkey = region.r_regionkey
Inner Join: supplier.s_nationkey = nation.n_nationkey
Inner Join: partsupp.ps_suppkey = supplier.s_suppkey
TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_supplycost]
TableScan: supplier projection=[s_suppkey, s_nationkey]
TableScan: nation projection=[n_nationkey, n_regionkey]
Filter: region.r_name = Utf8("EUROPE")
TableScan: region projection=[r_regionkey, r_name]
41 changes: 20 additions & 21 deletions benchmarks/expected-plans/q8.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,23 @@ Sort: all_nations.o_year ASC NULLS LAST
Aggregate: groupBy=[[all_nations.o_year]], aggr=[[SUM(CASE WHEN all_nations.nation = Utf8("BRAZIL") THEN all_nations.volume ELSE Decimal128(Some(0),38,4) END) AS SUM(CASE WHEN all_nations.nation = Utf8("BRAZIL") THEN all_nations.volume ELSE Int64(0) END), SUM(all_nations.volume)]]
SubqueryAlias: all_nations
Projection: datepart(Utf8("YEAR"), orders.o_orderdate) AS o_year, CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4)) AS volume, n2.n_name AS nation
Projection: lineitem.l_extendedprice, lineitem.l_discount, orders.o_orderdate, n2.n_name
Inner Join: n1.n_regionkey = region.r_regionkey
Inner Join: supplier.s_nationkey = n2.n_nationkey
Inner Join: customer.c_nationkey = n1.n_nationkey
Inner Join: orders.o_custkey = customer.c_custkey
Inner Join: lineitem.l_orderkey = orders.o_orderkey
Inner Join: lineitem.l_suppkey = supplier.s_suppkey
Inner Join: part.p_partkey = lineitem.l_partkey
Filter: part.p_type = Utf8("ECONOMY ANODIZED STEEL")
TableScan: part projection=[p_partkey, p_type]
TableScan: lineitem projection=[l_orderkey, l_partkey, l_suppkey, l_extendedprice, l_discount]
TableScan: supplier projection=[s_suppkey, s_nationkey]
Filter: orders.o_orderdate >= Date32("9131") AND orders.o_orderdate <= Date32("9861")
TableScan: orders projection=[o_orderkey, o_custkey, o_orderdate]
TableScan: customer projection=[c_custkey, c_nationkey]
SubqueryAlias: n1
TableScan: nation projection=[n_nationkey, n_regionkey]
SubqueryAlias: n2
TableScan: nation projection=[n_nationkey, n_name]
Filter: region.r_name = Utf8("AMERICA")
TableScan: region projection=[r_regionkey, r_name]
Inner Join: n1.n_regionkey = region.r_regionkey
Inner Join: supplier.s_nationkey = n2.n_nationkey
Inner Join: customer.c_nationkey = n1.n_nationkey
Inner Join: orders.o_custkey = customer.c_custkey
Inner Join: lineitem.l_orderkey = orders.o_orderkey
Inner Join: lineitem.l_suppkey = supplier.s_suppkey
Inner Join: part.p_partkey = lineitem.l_partkey
Filter: part.p_type = Utf8("ECONOMY ANODIZED STEEL")
TableScan: part projection=[p_partkey, p_type]
TableScan: lineitem projection=[l_orderkey, l_partkey, l_suppkey, l_extendedprice, l_discount]
TableScan: supplier projection=[s_suppkey, s_nationkey]
Filter: orders.o_orderdate >= Date32("9131") AND orders.o_orderdate <= Date32("9861")
TableScan: orders projection=[o_orderkey, o_custkey, o_orderdate]
TableScan: customer projection=[c_custkey, c_nationkey]
SubqueryAlias: n1
TableScan: nation projection=[n_nationkey, n_regionkey]
SubqueryAlias: n2
TableScan: nation projection=[n_nationkey, n_name]
Filter: region.r_name = Utf8("AMERICA")
TableScan: region projection=[r_regionkey, r_name]
25 changes: 12 additions & 13 deletions benchmarks/expected-plans/q9.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,15 @@ Sort: profit.nation ASC NULLS LAST, profit.o_year DESC NULLS FIRST
Aggregate: groupBy=[[profit.nation, profit.o_year]], aggr=[[SUM(profit.amount)]]
SubqueryAlias: profit
Projection: nation.n_name AS nation, datepart(Utf8("YEAR"), orders.o_orderdate) AS o_year, CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4)) - CAST(partsupp.ps_supplycost * lineitem.l_quantity AS Decimal128(38, 4)) AS amount
Projection: lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, partsupp.ps_supplycost, orders.o_orderdate, nation.n_name
Inner Join: supplier.s_nationkey = nation.n_nationkey
Inner Join: lineitem.l_orderkey = orders.o_orderkey
Inner Join: lineitem.l_suppkey = partsupp.ps_suppkey, lineitem.l_partkey = partsupp.ps_partkey
Inner Join: lineitem.l_suppkey = supplier.s_suppkey
Inner Join: part.p_partkey = lineitem.l_partkey
Filter: part.p_name LIKE Utf8("%green%")
TableScan: part projection=[p_partkey, p_name]
TableScan: lineitem projection=[l_orderkey, l_partkey, l_suppkey, l_quantity, l_extendedprice, l_discount]
TableScan: supplier projection=[s_suppkey, s_nationkey]
TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_supplycost]
TableScan: orders projection=[o_orderkey, o_orderdate]
TableScan: nation projection=[n_nationkey, n_name]
Inner Join: supplier.s_nationkey = nation.n_nationkey
Inner Join: lineitem.l_orderkey = orders.o_orderkey
Inner Join: lineitem.l_suppkey = partsupp.ps_suppkey, lineitem.l_partkey = partsupp.ps_partkey
Inner Join: lineitem.l_suppkey = supplier.s_suppkey
Inner Join: part.p_partkey = lineitem.l_partkey
Filter: part.p_name LIKE Utf8("%green%")
TableScan: part projection=[p_partkey, p_name]
TableScan: lineitem projection=[l_orderkey, l_partkey, l_suppkey, l_quantity, l_extendedprice, l_discount]
TableScan: supplier projection=[s_suppkey, s_nationkey]
TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_supplycost]
TableScan: orders projection=[o_orderkey, o_orderdate]
TableScan: nation projection=[n_nationkey, n_name]
1 change: 1 addition & 0 deletions datafusion/optimizer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ pub mod eliminate_outer_join;
pub mod extract_equijoin_predicate;
pub mod filter_null_join_keys;
pub mod inline_table_scan;
pub mod merge_projection;
pub mod optimizer;
pub mod propagate_empty_relation;
pub mod push_down_filter;
Expand Down
Loading

0 comments on commit e569fef

Please sign in to comment.