Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move subquery alias assignment onto rules #4767

Merged
merged 1 commit into from
Dec 30, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions benchmarks/expected-plans/q11.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Sort: value DESC NULLS FIRST
Projection: partsupp.ps_partkey, SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS value
Filter: CAST(SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS Decimal128(38, 15)) > CAST(__sq_1.__value AS Decimal128(38, 15))
Filter: CAST(SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS Decimal128(38, 15)) > CAST(__scalar_sq_1.__value AS Decimal128(38, 15))
CrossJoin:
Aggregate: groupBy=[[partsupp.ps_partkey]], aggr=[[SUM(CAST(partsupp.ps_supplycost AS Decimal128(26, 2)) * CAST(partsupp.ps_availqty AS Decimal128(26, 2)))]]
Inner Join: supplier.s_nationkey = nation.n_nationkey
Expand All @@ -9,7 +9,7 @@ Sort: value DESC NULLS FIRST
TableScan: supplier projection=[s_suppkey, s_nationkey]
Filter: nation.n_name = Utf8("GERMANY")
TableScan: nation projection=[n_nationkey, n_name]
SubqueryAlias: __sq_1
SubqueryAlias: __scalar_sq_1
Projection: CAST(SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS Float64) * Float64(0.0001) AS __value
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(partsupp.ps_supplycost AS Decimal128(26, 2)) * CAST(partsupp.ps_availqty AS Decimal128(26, 2)))]]
Inner Join: supplier.s_nationkey = nation.n_nationkey
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/expected-plans/q15.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
EmptyRelation
Sort: supplier.s_suppkey ASC NULLS LAST
Projection: supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, revenue0.total_revenue
Inner Join: revenue0.total_revenue = __sq_1.__value
Inner Join: revenue0.total_revenue = __scalar_sq_1.__value
Inner Join: supplier.s_suppkey = revenue0.supplier_no
TableScan: supplier projection=[s_suppkey, s_name, s_address, s_phone]
SubqueryAlias: revenue0
Expand All @@ -10,7 +10,7 @@ Sort: supplier.s_suppkey ASC NULLS LAST
Aggregate: groupBy=[[lineitem.l_suppkey]], aggr=[[SUM(CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]]
Filter: lineitem.l_shipdate >= Date32("9496") AND lineitem.l_shipdate < Date32("9587")
TableScan: lineitem projection=[l_suppkey, l_extendedprice, l_discount, l_shipdate]
SubqueryAlias: __sq_1
SubqueryAlias: __scalar_sq_1
Projection: MAX(revenue0.total_revenue) AS __value
Aggregate: groupBy=[[]], aggr=[[MAX(revenue0.total_revenue)]]
SubqueryAlias: revenue0
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/expected-plans/q16.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@ Sort: supplier_cnt DESC NULLS FIRST, part.p_brand ASC NULLS LAST, part.p_type AS
Projection: group_alias_0 AS part.p_brand, group_alias_1 AS part.p_type, group_alias_2 AS part.p_size, COUNT(alias1) AS COUNT(DISTINCT partsupp.ps_suppkey)
Aggregate: groupBy=[[group_alias_0, group_alias_1, group_alias_2]], aggr=[[COUNT(alias1)]]
Aggregate: groupBy=[[part.p_brand AS group_alias_0, part.p_type AS group_alias_1, part.p_size AS group_alias_2, partsupp.ps_suppkey AS alias1]], aggr=[[]]
LeftAnti Join: partsupp.ps_suppkey = __sq_1.s_suppkey
LeftAnti Join: partsupp.ps_suppkey = __correlated_sq_1.s_suppkey
Inner Join: partsupp.ps_partkey = part.p_partkey
TableScan: partsupp projection=[ps_partkey, ps_suppkey]
Filter: part.p_brand != Utf8("Brand#45") AND part.p_type NOT LIKE Utf8("MEDIUM POLISHED%") AND part.p_size IN ([Int32(49), Int32(14), Int32(23), Int32(45), Int32(19), Int32(3), Int32(36), Int32(9)])
TableScan: part projection=[p_partkey, p_brand, p_type, p_size]
SubqueryAlias: __sq_1
SubqueryAlias: __correlated_sq_1
Projection: supplier.s_suppkey AS s_suppkey
Filter: supplier.s_comment LIKE Utf8("%Customer%Complaints%")
TableScan: supplier projection=[s_suppkey, s_comment]
13 changes: 7 additions & 6 deletions benchmarks/expected-plans/q17.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
Projection: CAST(SUM(lineitem.l_extendedprice) AS Decimal128(38, 33)) / Decimal128(Some(7000000000000000195487369212723200),38,33) AS avg_yearly
Projection: CAST(SUM(lineitem.l_extendedprice) AS Float64) / Float64(7) AS avg_yearly
Aggregate: groupBy=[[]], aggr=[[SUM(lineitem.l_extendedprice)]]
Filter: CAST(lineitem.l_quantity AS Decimal128(38, 21)) < __sq_1.__value
Inner Join: part.p_partkey = __sq_1.l_partkey
Filter: CAST(lineitem.l_quantity AS Decimal128(30, 15)) < CAST(__scalar_sq_1.__value AS Decimal128(30, 15))
Inner Join: part.p_partkey = __scalar_sq_1.l_partkey, lineitem.l_partkey = __scalar_sq_1.l_partkey
Inner Join: lineitem.l_partkey = part.p_partkey
TableScan: lineitem projection=[l_partkey, l_quantity, l_extendedprice]
Filter: part.p_brand = Utf8("Brand#23") AND part.p_container = Utf8("MED BOX")
TableScan: part projection=[p_partkey, p_brand, p_container]
Projection: lineitem.l_partkey, Decimal128(Some(200000000000000000000),38,21) * CAST(AVG(lineitem.l_quantity) AS Decimal128(38, 21)) AS __value, alias=__sq_1
Aggregate: groupBy=[[lineitem.l_partkey]], aggr=[[AVG(lineitem.l_quantity)]]
TableScan: lineitem projection=[l_partkey, l_quantity, l_extendedprice]
SubqueryAlias: __scalar_sq_1
Projection: lineitem.l_partkey, Float64(0.2) * CAST(AVG(lineitem.l_quantity) AS Float64) AS __value
Aggregate: groupBy=[[lineitem.l_partkey]], aggr=[[AVG(lineitem.l_quantity)]]
TableScan: lineitem projection=[l_partkey, l_quantity]
4 changes: 2 additions & 2 deletions benchmarks/expected-plans/q18.txt
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
Sort: orders.o_totalprice DESC NULLS FIRST, orders.o_orderdate ASC NULLS LAST
Projection: customer.c_name, customer.c_custkey, orders.o_orderkey, orders.o_orderdate, orders.o_totalprice, SUM(lineitem.l_quantity)
Aggregate: groupBy=[[customer.c_name, customer.c_custkey, orders.o_orderkey, orders.o_orderdate, orders.o_totalprice]], aggr=[[SUM(lineitem.l_quantity)]]
LeftSemi Join: orders.o_orderkey = __sq_1.l_orderkey
LeftSemi Join: orders.o_orderkey = __correlated_sq_1.l_orderkey
Inner Join: orders.o_orderkey = lineitem.l_orderkey
Inner Join: customer.c_custkey = orders.o_custkey
TableScan: customer projection=[c_custkey, c_name]
TableScan: orders projection=[o_orderkey, o_custkey, o_totalprice, o_orderdate]
TableScan: lineitem projection=[l_orderkey, l_quantity]
SubqueryAlias: __sq_1
SubqueryAlias: __correlated_sq_1
Projection: lineitem.l_orderkey AS l_orderkey
Filter: SUM(lineitem.l_quantity) > Decimal128(Some(30000),25,2)
Aggregate: groupBy=[[lineitem.l_orderkey]], aggr=[[SUM(lineitem.l_quantity)]]
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/expected-plans/q2.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Sort: supplier.s_acctbal DESC NULLS FIRST, nation.n_name ASC NULLS LAST, supplier.s_name ASC NULLS LAST, part.p_partkey ASC NULLS LAST
Projection: supplier.s_acctbal, supplier.s_name, nation.n_name, part.p_partkey, part.p_mfgr, supplier.s_address, supplier.s_phone, supplier.s_comment
Projection: part.p_partkey, part.p_mfgr, supplier.s_name, supplier.s_address, supplier.s_phone, supplier.s_acctbal, supplier.s_comment, nation.n_name
Inner Join: part.p_partkey = __sq_1.ps_partkey, partsupp.ps_supplycost = __sq_1.__value
Inner Join: part.p_partkey = __scalar_sq_1.ps_partkey, partsupp.ps_supplycost = __scalar_sq_1.__value
Inner Join: nation.n_regionkey = region.r_regionkey
Inner Join: supplier.s_nationkey = nation.n_nationkey
Inner Join: partsupp.ps_suppkey = supplier.s_suppkey
Expand All @@ -13,7 +13,7 @@ Sort: supplier.s_acctbal DESC NULLS FIRST, nation.n_name ASC NULLS LAST, supplie
TableScan: nation projection=[n_nationkey, n_name, n_regionkey]
Filter: region.r_name = Utf8("EUROPE")
TableScan: region projection=[r_regionkey, r_name]
SubqueryAlias: __sq_1
SubqueryAlias: __scalar_sq_1
Projection: partsupp.ps_partkey, MIN(partsupp.ps_supplycost) AS __value
Aggregate: groupBy=[[partsupp.ps_partkey]], aggr=[[MIN(partsupp.ps_supplycost)]]
Inner Join: nation.n_regionkey = region.r_regionkey
Expand Down
14 changes: 7 additions & 7 deletions benchmarks/expected-plans/q20.txt
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
Sort: supplier.s_name ASC NULLS LAST
Projection: supplier.s_name, supplier.s_address
LeftSemi Join: supplier.s_suppkey = __sq_1.ps_suppkey
LeftSemi Join: supplier.s_suppkey = __correlated_sq_1.ps_suppkey
Inner Join: supplier.s_nationkey = nation.n_nationkey
TableScan: supplier projection=[s_suppkey, s_name, s_address, s_nationkey]
Filter: nation.n_name = Utf8("CANADA")
TableScan: nation projection=[n_nationkey, n_name]
SubqueryAlias: __sq_1
SubqueryAlias: __correlated_sq_1
Projection: partsupp.ps_suppkey AS ps_suppkey
Filter: CAST(partsupp.ps_availqty AS Float64) > __sq_3.__value
Inner Join: partsupp.ps_partkey = __sq_3.l_partkey, partsupp.ps_suppkey = __sq_3.l_suppkey
LeftSemi Join: partsupp.ps_partkey = __sq_2.p_partkey
Filter: CAST(partsupp.ps_availqty AS Float64) > __scalar_sq_1.__value
Inner Join: partsupp.ps_partkey = __scalar_sq_1.l_partkey, partsupp.ps_suppkey = __scalar_sq_1.l_suppkey
LeftSemi Join: partsupp.ps_partkey = __correlated_sq_2.p_partkey
TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_availqty]
SubqueryAlias: __sq_2
SubqueryAlias: __correlated_sq_2
Projection: part.p_partkey AS p_partkey
Filter: part.p_name LIKE Utf8("forest%")
TableScan: part projection=[p_partkey, p_name]
SubqueryAlias: __sq_3
SubqueryAlias: __scalar_sq_1
Projection: lineitem.l_partkey, lineitem.l_suppkey, Float64(0.5) * CAST(SUM(lineitem.l_quantity) AS Float64) AS __value
Aggregate: groupBy=[[lineitem.l_partkey, lineitem.l_suppkey]], aggr=[[SUM(lineitem.l_quantity)]]
Filter: lineitem.l_shipdate >= Date32("8766") AND lineitem.l_shipdate < Date32("9131")
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/expected-plans/q22.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@ Sort: custsale.cntrycode ASC NULLS LAST
Aggregate: groupBy=[[custsale.cntrycode]], aggr=[[COUNT(UInt8(1)), SUM(custsale.c_acctbal)]]
SubqueryAlias: custsale
Projection: substr(customer.c_phone, Int64(1), Int64(2)) AS cntrycode, customer.c_acctbal
Filter: CAST(customer.c_acctbal AS Decimal128(19, 6)) > __sq_1.__value
Filter: CAST(customer.c_acctbal AS Decimal128(19, 6)) > __scalar_sq_1.__value
CrossJoin:
LeftAnti Join: customer.c_custkey = orders.o_custkey
Filter: substr(customer.c_phone, Int64(1), Int64(2)) IN ([Utf8("13"), Utf8("31"), Utf8("23"), Utf8("29"), Utf8("30"), Utf8("18"), Utf8("17")])
TableScan: customer projection=[c_custkey, c_phone, c_acctbal]
TableScan: orders projection=[o_custkey]
SubqueryAlias: __sq_1
SubqueryAlias: __scalar_sq_1
Projection: AVG(customer.c_acctbal) AS __value
Aggregate: groupBy=[[]], aggr=[[AVG(customer.c_acctbal)]]
Filter: customer.c_acctbal > Decimal128(Some(0),15,2) AND substr(customer.c_phone, Int64(1), Int64(2)) IN ([Utf8("13"), Utf8("31"), Utf8("23"), Utf8("29"), Utf8("30"), Utf8("18"), Utf8("17")])
Expand Down
12 changes: 1 addition & 11 deletions benchmarks/src/bin/tpch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -596,17 +596,7 @@ mod tests {
expected_plan(16).await
}

/// This query produces different plans depending on operating system. The difference is
/// due to re-writing the following expression:
///
/// `sum(l_extendedprice) / 7.0 as avg_yearly`
///
/// Linux: Decimal128(Some(7000000000000000195487369212723200),38,33)
/// Windows: Decimal128(Some(6999999999999999042565864605876224),38,33)
///
/// See https://github.com/apache/arrow-datafusion/issues/3791
#[tokio::test]
#[ignore]
#[tokio::test]
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was fixed by #4038

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

make sense to me

async fn q17_expected_plan() -> Result<()> {
expected_plan(17).await
}
Expand Down
Loading