Skip to content

Commit

Permalink
[fix](Nereids) column pruning under union broken unexpectedly (#26884) (
Browse files Browse the repository at this point in the history
  • Loading branch information
morrySnow authored Nov 14, 2023
1 parent 0d9f486 commit be34030
Show file tree
Hide file tree
Showing 17 changed files with 504 additions and 343 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -117,9 +117,6 @@ public Plan visitLogicalUnion(LogicalUnion union, PruneContext context) {
}

LogicalUnion prunedOutputUnion = pruneOutput(union, union.getOutputs(), union::pruneOutputs, context);
if (prunedOutputUnion == union) {
return union;
}

// start prune children of union
List<Slot> originOutput = union.getOutput();
Expand Down
48 changes: 30 additions & 18 deletions regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query14.out
Original file line number Diff line number Diff line change
Expand Up @@ -62,24 +62,27 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
--------------------PhysicalProject
----------------------PhysicalOlapScan[store_sales]
--------------------PhysicalDistribute
----------------------filter((date_dim.d_year <= 2002)(date_dim.d_year >= 2000))
------------------------PhysicalOlapScan[date_dim]
----------------------PhysicalProject
------------------------filter((date_dim.d_year <= 2002)(date_dim.d_year >= 2000))
--------------------------PhysicalOlapScan[date_dim]
--------------PhysicalDistribute
----------------PhysicalProject
------------------hashJoin[INNER_JOIN](catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)
--------------------PhysicalProject
----------------------PhysicalOlapScan[catalog_sales]
--------------------PhysicalDistribute
----------------------filter((date_dim.d_year <= 2002)(date_dim.d_year >= 2000))
------------------------PhysicalOlapScan[date_dim]
----------------------PhysicalProject
------------------------filter((date_dim.d_year <= 2002)(date_dim.d_year >= 2000))
--------------------------PhysicalOlapScan[date_dim]
--------------PhysicalDistribute
----------------PhysicalProject
------------------hashJoin[INNER_JOIN](web_sales.ws_sold_date_sk = date_dim.d_date_sk)
--------------------PhysicalProject
----------------------PhysicalOlapScan[web_sales]
--------------------PhysicalDistribute
----------------------filter((date_dim.d_year >= 2000)(date_dim.d_year <= 2002))
------------------------PhysicalOlapScan[date_dim]
----------------------PhysicalProject
------------------------filter((date_dim.d_year >= 2000)(date_dim.d_year <= 2002))
--------------------------PhysicalOlapScan[date_dim]
----PhysicalResultSink
------PhysicalTopN
--------PhysicalDistribute
Expand All @@ -103,12 +106,15 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
----------------------------------------hashJoin[INNER_JOIN](store_sales.ss_item_sk = item.i_item_sk)
------------------------------------------PhysicalDistribute
--------------------------------------------hashJoin[INNER_JOIN](store_sales.ss_sold_date_sk = date_dim.d_date_sk)
----------------------------------------------PhysicalOlapScan[store_sales]
----------------------------------------------PhysicalProject
------------------------------------------------PhysicalOlapScan[store_sales]
----------------------------------------------PhysicalDistribute
------------------------------------------------filter((date_dim.d_year = 2002)(date_dim.d_moy = 11))
--------------------------------------------------PhysicalOlapScan[date_dim]
------------------------------------------------PhysicalProject
--------------------------------------------------filter((date_dim.d_year = 2002)(date_dim.d_moy = 11))
----------------------------------------------------PhysicalOlapScan[date_dim]
------------------------------------------PhysicalDistribute
--------------------------------------------PhysicalOlapScan[item]
--------------------------------------------PhysicalProject
----------------------------------------------PhysicalOlapScan[item]
----------------------------PhysicalDistribute
------------------------------PhysicalAssertNumRows
--------------------------------PhysicalDistribute
Expand All @@ -126,12 +132,15 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
----------------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_item_sk = item.i_item_sk)
------------------------------------------PhysicalDistribute
--------------------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)
----------------------------------------------PhysicalOlapScan[catalog_sales]
----------------------------------------------PhysicalProject
------------------------------------------------PhysicalOlapScan[catalog_sales]
----------------------------------------------PhysicalDistribute
------------------------------------------------filter((date_dim.d_year = 2002)(date_dim.d_moy = 11))
--------------------------------------------------PhysicalOlapScan[date_dim]
------------------------------------------------PhysicalProject
--------------------------------------------------filter((date_dim.d_year = 2002)(date_dim.d_moy = 11))
----------------------------------------------------PhysicalOlapScan[date_dim]
------------------------------------------PhysicalDistribute
--------------------------------------------PhysicalOlapScan[item]
--------------------------------------------PhysicalProject
----------------------------------------------PhysicalOlapScan[item]
----------------------------PhysicalDistribute
------------------------------PhysicalAssertNumRows
--------------------------------PhysicalDistribute
Expand All @@ -149,12 +158,15 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
----------------------------------------hashJoin[INNER_JOIN](web_sales.ws_item_sk = item.i_item_sk)
------------------------------------------PhysicalDistribute
--------------------------------------------hashJoin[INNER_JOIN](web_sales.ws_sold_date_sk = date_dim.d_date_sk)
----------------------------------------------PhysicalOlapScan[web_sales]
----------------------------------------------PhysicalProject
------------------------------------------------PhysicalOlapScan[web_sales]
----------------------------------------------PhysicalDistribute
------------------------------------------------filter((date_dim.d_year = 2002)(date_dim.d_moy = 11))
--------------------------------------------------PhysicalOlapScan[date_dim]
------------------------------------------------PhysicalProject
--------------------------------------------------filter((date_dim.d_year = 2002)(date_dim.d_moy = 11))
----------------------------------------------------PhysicalOlapScan[date_dim]
------------------------------------------PhysicalDistribute
--------------------------------------------PhysicalOlapScan[item]
--------------------------------------------PhysicalProject
----------------------------------------------PhysicalOlapScan[item]
----------------------------PhysicalDistribute
------------------------------PhysicalAssertNumRows
--------------------------------PhysicalDistribute
Expand Down
46 changes: 26 additions & 20 deletions regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query23.out
Original file line number Diff line number Diff line change
Expand Up @@ -65,30 +65,36 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
------------------------PhysicalProject
--------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
----------------------PhysicalDistribute
------------------------hashJoin[LEFT_SEMI_JOIN](catalog_sales.cs_bill_customer_sk = best_ss_customer.c_customer_sk)
--------------------------PhysicalDistribute
----------------------------hashJoin[INNER_JOIN](catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)
------------------------------PhysicalOlapScan[catalog_sales]
------------------------------PhysicalDistribute
--------------------------------filter((date_dim.d_year = 2000)(date_dim.d_moy = 5))
----------------------------------PhysicalOlapScan[date_dim]
--------------------------PhysicalDistribute
----------------------------PhysicalProject
------------------------------PhysicalCteConsumer ( cteId=CTEId#2 )
------------------------PhysicalProject
--------------------------hashJoin[LEFT_SEMI_JOIN](catalog_sales.cs_bill_customer_sk = best_ss_customer.c_customer_sk)
----------------------------PhysicalDistribute
------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)
--------------------------------PhysicalProject
----------------------------------PhysicalOlapScan[catalog_sales]
--------------------------------PhysicalDistribute
----------------------------------PhysicalProject
------------------------------------filter((date_dim.d_year = 2000)(date_dim.d_moy = 5))
--------------------------------------PhysicalOlapScan[date_dim]
----------------------------PhysicalDistribute
------------------------------PhysicalProject
--------------------------------PhysicalCteConsumer ( cteId=CTEId#2 )
------------------PhysicalProject
--------------------hashJoin[RIGHT_SEMI_JOIN](web_sales.ws_item_sk = frequent_ss_items.item_sk)
----------------------PhysicalDistribute
------------------------PhysicalProject
--------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
----------------------PhysicalDistribute
------------------------hashJoin[LEFT_SEMI_JOIN](web_sales.ws_bill_customer_sk = best_ss_customer.c_customer_sk)
--------------------------PhysicalDistribute
----------------------------hashJoin[INNER_JOIN](web_sales.ws_sold_date_sk = date_dim.d_date_sk)
------------------------------PhysicalOlapScan[web_sales]
------------------------------PhysicalDistribute
--------------------------------filter((date_dim.d_year = 2000)(date_dim.d_moy = 5))
----------------------------------PhysicalOlapScan[date_dim]
--------------------------PhysicalDistribute
----------------------------PhysicalProject
------------------------------PhysicalCteConsumer ( cteId=CTEId#2 )
------------------------PhysicalProject
--------------------------hashJoin[LEFT_SEMI_JOIN](web_sales.ws_bill_customer_sk = best_ss_customer.c_customer_sk)
----------------------------PhysicalDistribute
------------------------------hashJoin[INNER_JOIN](web_sales.ws_sold_date_sk = date_dim.d_date_sk)
--------------------------------PhysicalProject
----------------------------------PhysicalOlapScan[web_sales]
--------------------------------PhysicalDistribute
----------------------------------PhysicalProject
------------------------------------filter((date_dim.d_year = 2000)(date_dim.d_moy = 5))
--------------------------------------PhysicalOlapScan[date_dim]
----------------------------PhysicalDistribute
------------------------------PhysicalProject
--------------------------------PhysicalCteConsumer ( cteId=CTEId#2 )

131 changes: 75 additions & 56 deletions regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query33.out
Original file line number Diff line number Diff line change
Expand Up @@ -9,74 +9,93 @@ PhysicalResultSink
------------hashAgg[LOCAL]
--------------PhysicalUnion
----------------PhysicalProject
------------------hashJoin[RIGHT_SEMI_JOIN](item.i_manufact_id = item.i_manufact_id)
------------------hashAgg[GLOBAL]
--------------------PhysicalDistribute
----------------------PhysicalProject
------------------------filter((item.i_category = 'Home'))
--------------------------PhysicalOlapScan[item]
--------------------hashAgg[GLOBAL]
----------------------PhysicalDistribute
------------------------hashAgg[LOCAL]
--------------------------PhysicalProject
----------------------------hashJoin[INNER_JOIN](store_sales.ss_item_sk = item.i_item_sk)
------------------------------PhysicalDistribute
----------------------hashAgg[LOCAL]
------------------------PhysicalProject
--------------------------hashJoin[INNER_JOIN](store_sales.ss_item_sk = item.i_item_sk)
----------------------------PhysicalDistribute
------------------------------PhysicalProject
--------------------------------hashJoin[INNER_JOIN](store_sales.ss_addr_sk = customer_address.ca_address_sk)
----------------------------------PhysicalDistribute
------------------------------------hashJoin[INNER_JOIN](store_sales.ss_sold_date_sk = date_dim.d_date_sk)
--------------------------------------PhysicalOlapScan[store_sales]
--------------------------------------PhysicalDistribute
----------------------------------------filter((date_dim.d_moy = 1)(date_dim.d_year = 2002))
------------------------------------------PhysicalOlapScan[date_dim]
------------------------------------PhysicalProject
--------------------------------------hashJoin[INNER_JOIN](store_sales.ss_sold_date_sk = date_dim.d_date_sk)
----------------------------------------PhysicalProject
------------------------------------------PhysicalOlapScan[store_sales]
----------------------------------------PhysicalDistribute
------------------------------------------PhysicalProject
--------------------------------------------filter((date_dim.d_moy = 1)(date_dim.d_year = 2002))
----------------------------------------------PhysicalOlapScan[date_dim]
----------------------------------PhysicalDistribute
------------------------------------filter((customer_address.ca_gmt_offset = -5.00))
--------------------------------------PhysicalOlapScan[customer_address]
------------------------------PhysicalDistribute
--------------------------------PhysicalOlapScan[item]
------------------------------------PhysicalProject
--------------------------------------filter((customer_address.ca_gmt_offset = -5.00))
----------------------------------------PhysicalOlapScan[customer_address]
----------------------------PhysicalDistribute
------------------------------hashJoin[LEFT_SEMI_JOIN](item.i_manufact_id = item.i_manufact_id)
--------------------------------PhysicalDistribute
----------------------------------PhysicalProject
------------------------------------PhysicalOlapScan[item]
--------------------------------PhysicalDistribute
----------------------------------PhysicalProject
------------------------------------filter((item.i_category = 'Home'))
--------------------------------------PhysicalOlapScan[item]
----------------PhysicalProject
------------------hashJoin[RIGHT_SEMI_JOIN](item.i_manufact_id = item.i_manufact_id)
------------------hashAgg[GLOBAL]
--------------------PhysicalDistribute
----------------------PhysicalProject
------------------------filter((item.i_category = 'Home'))
--------------------------PhysicalOlapScan[item]
--------------------hashAgg[GLOBAL]
----------------------PhysicalDistribute
------------------------hashAgg[LOCAL]
--------------------------PhysicalProject
----------------------------hashJoin[INNER_JOIN](catalog_sales.cs_item_sk = item.i_item_sk)
------------------------------PhysicalDistribute
----------------------hashAgg[LOCAL]
------------------------PhysicalProject
--------------------------hashJoin[INNER_JOIN](catalog_sales.cs_item_sk = item.i_item_sk)
----------------------------PhysicalDistribute
------------------------------PhysicalProject
--------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_bill_addr_sk = customer_address.ca_address_sk)
----------------------------------PhysicalDistribute
------------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)
--------------------------------------PhysicalOlapScan[catalog_sales]
--------------------------------------PhysicalDistribute
----------------------------------------filter((date_dim.d_moy = 1)(date_dim.d_year = 2002))
------------------------------------------PhysicalOlapScan[date_dim]
------------------------------------PhysicalProject
--------------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)
----------------------------------------PhysicalProject
------------------------------------------PhysicalOlapScan[catalog_sales]
----------------------------------------PhysicalDistribute
------------------------------------------PhysicalProject
--------------------------------------------filter((date_dim.d_moy = 1)(date_dim.d_year = 2002))
----------------------------------------------PhysicalOlapScan[date_dim]
----------------------------------PhysicalDistribute
------------------------------------filter((customer_address.ca_gmt_offset = -5.00))
--------------------------------------PhysicalOlapScan[customer_address]
------------------------------PhysicalDistribute
--------------------------------PhysicalOlapScan[item]
------------------------------------PhysicalProject
--------------------------------------filter((customer_address.ca_gmt_offset = -5.00))
----------------------------------------PhysicalOlapScan[customer_address]
----------------------------PhysicalDistribute
------------------------------hashJoin[LEFT_SEMI_JOIN](item.i_manufact_id = item.i_manufact_id)
--------------------------------PhysicalDistribute
----------------------------------PhysicalProject
------------------------------------PhysicalOlapScan[item]
--------------------------------PhysicalDistribute
----------------------------------PhysicalProject
------------------------------------filter((item.i_category = 'Home'))
--------------------------------------PhysicalOlapScan[item]
----------------PhysicalProject
------------------hashJoin[RIGHT_SEMI_JOIN](item.i_manufact_id = item.i_manufact_id)
--------------------PhysicalDistribute
----------------------PhysicalProject
------------------------filter((item.i_category = 'Home'))
--------------------------PhysicalOlapScan[item]
--------------------hashAgg[GLOBAL]
----------------------PhysicalDistribute
------------------------hashAgg[LOCAL]
--------------------------PhysicalProject
----------------------------hashJoin[INNER_JOIN](web_sales.ws_item_sk = item.i_item_sk)
------------------hashAgg[LOCAL]
--------------------PhysicalProject
----------------------hashJoin[LEFT_SEMI_JOIN](item.i_manufact_id = item.i_manufact_id)
------------------------PhysicalDistribute
--------------------------hashJoin[INNER_JOIN](web_sales.ws_item_sk = item.i_item_sk)
----------------------------PhysicalProject
------------------------------PhysicalOlapScan[item]
------------------------------PhysicalDistribute
----------------------------PhysicalDistribute
------------------------------PhysicalProject
--------------------------------hashJoin[INNER_JOIN](web_sales.ws_bill_addr_sk = customer_address.ca_address_sk)
----------------------------------PhysicalDistribute
------------------------------------hashJoin[INNER_JOIN](web_sales.ws_sold_date_sk = date_dim.d_date_sk)
--------------------------------------PhysicalOlapScan[web_sales]
--------------------------------------PhysicalDistribute
----------------------------------------filter((date_dim.d_moy = 1)(date_dim.d_year = 2002))
------------------------------------------PhysicalOlapScan[date_dim]
------------------------------------PhysicalProject
--------------------------------------hashJoin[INNER_JOIN](web_sales.ws_sold_date_sk = date_dim.d_date_sk)
----------------------------------------PhysicalProject
------------------------------------------PhysicalOlapScan[web_sales]
----------------------------------------PhysicalDistribute
------------------------------------------PhysicalProject
--------------------------------------------filter((date_dim.d_moy = 1)(date_dim.d_year = 2002))
----------------------------------------------PhysicalOlapScan[date_dim]
----------------------------------PhysicalDistribute
------------------------------------filter((customer_address.ca_gmt_offset = -5.00))
--------------------------------------PhysicalOlapScan[customer_address]
------------------------------------PhysicalProject
--------------------------------------filter((customer_address.ca_gmt_offset = -5.00))
----------------------------------------PhysicalOlapScan[customer_address]
------------------------PhysicalDistribute
--------------------------PhysicalProject
----------------------------filter((item.i_category = 'Home'))
------------------------------PhysicalOlapScan[item]

Loading

0 comments on commit be34030

Please sign in to comment.