Skip to content

Commit

Permalink
[pick](nereids) adjust bc join and shuffle join #27113 (#27566)
Browse files Browse the repository at this point in the history
  • Loading branch information
englefly authored Nov 25, 2023
1 parent 1b14da2 commit 24b16ad
Show file tree
Hide file tree
Showing 58 changed files with 99 additions and 2,586 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -307,8 +307,15 @@ public Cost visitPhysicalHashJoin(
int parallelInstance = Math.max(1, ConnectContext.get().getSessionVariable().getParallelExecInstanceNum());
int totalInstanceNumber = parallelInstance * beNumber;
if (buildSideFactor <= 1.0) {
// use totalInstanceNumber to the power of 2 as the default factor value
buildSideFactor = Math.pow(totalInstanceNumber, 0.5);
if (buildSideFactor <= 1.0) {
if (buildStats.computeSize() < 1024 * 1024) {
// no penalty to broadcast if build side is small
buildSideFactor = 1.0;
} else {
// use totalInstanceNumber to the power of 2 as the default factor value
buildSideFactor = Math.pow(totalInstanceNumber, 0.5);
}
}
}
// TODO: since the outputs rows may expand a lot, penalty on it will cause bc never be chosen.
// will refine this in next generation cost model.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,9 @@ public double computeSize() {
}

public double dataSizeFactor() {
return computeTupleSize() / K_BYTES;
double lowerBound = 0.03;
double upperBound = 0.07;
return Math.min(Math.max(computeTupleSize() / K_BYTES, lowerBound), upperBound);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,8 @@ PhysicalResultSink
------------------------------------------PhysicalOlapScan[date_dim]
------------------------------PhysicalDistribute
--------------------------------hashJoin[INNER_JOIN](c.c_current_addr_sk = ca.ca_address_sk)
----------------------------------PhysicalDistribute
------------------------------------PhysicalProject
--------------------------------------PhysicalOlapScan[customer]
----------------------------------PhysicalProject
------------------------------------PhysicalOlapScan[customer]
----------------------------------PhysicalDistribute
------------------------------------PhysicalProject
--------------------------------------filter(ca_county IN ('Storey County', 'Marquette County', 'Warren County', 'Cochran County', 'Kandiyohi County'))
Expand Down
29 changes: 15 additions & 14 deletions regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query23.out
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,22 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
--PhysicalCteProducer ( cteId=CTEId#0 )
----PhysicalProject
------filter((cnt > 4))
--------hashAgg[LOCAL]
----------PhysicalProject
------------hashJoin[INNER_JOIN](store_sales.ss_item_sk = item.i_item_sk)
--------------PhysicalDistribute
----------------PhysicalProject
------------------hashJoin[INNER_JOIN](store_sales.ss_sold_date_sk = date_dim.d_date_sk)
--------------------PhysicalProject
----------------------PhysicalOlapScan[store_sales]
--------------------PhysicalDistribute
--------hashAgg[GLOBAL]
----------PhysicalDistribute
------------hashAgg[LOCAL]
--------------PhysicalProject
----------------hashJoin[INNER_JOIN](store_sales.ss_item_sk = item.i_item_sk)
------------------PhysicalProject
--------------------hashJoin[INNER_JOIN](store_sales.ss_sold_date_sk = date_dim.d_date_sk)
----------------------PhysicalProject
------------------------filter(d_year IN (2000, 2001, 2002, 2003))
--------------------------PhysicalOlapScan[date_dim]
--------------PhysicalDistribute
----------------PhysicalProject
------------------PhysicalOlapScan[item]
------------------------PhysicalOlapScan[store_sales]
----------------------PhysicalDistribute
------------------------PhysicalProject
--------------------------filter(d_year IN (2000, 2001, 2002, 2003))
----------------------------PhysicalOlapScan[date_dim]
------------------PhysicalDistribute
--------------------PhysicalProject
----------------------PhysicalOlapScan[item]
--PhysicalCteAnchor ( cteId=CTEId#2 )
----PhysicalCteProducer ( cteId=CTEId#2 )
------PhysicalProject
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,18 +36,17 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
--------PhysicalDistribute
----------PhysicalTopN
------------PhysicalProject
--------------hashJoin[INNER_JOIN](v1.i_category = v1_lead.i_category)(v1.i_brand = v1_lead.i_brand)(v1.s_store_name = v1_lead.s_store_name)(v1.s_company_name = v1_lead.s_company_name)(v1.rn = expr_(rn - 1))
--------------hashJoin[INNER_JOIN](v1.i_category = v1_lag.i_category)(v1.i_brand = v1_lag.i_brand)(v1.s_store_name = v1_lag.s_store_name)(v1.s_company_name = v1_lag.s_company_name)(v1.rn = expr_(rn + 1))
----------------PhysicalDistribute
------------------PhysicalProject
--------------------PhysicalCteConsumer ( cteId=CTEId#0 )
----------------PhysicalDistribute
------------------PhysicalProject
--------------------hashJoin[INNER_JOIN](v1.i_category = v1_lag.i_category)(v1.i_brand = v1_lag.i_brand)(v1.s_store_name = v1_lag.s_store_name)(v1.s_company_name = v1_lag.s_company_name)(v1.rn = expr_(rn + 1))
----------------------PhysicalDistribute
------------------------PhysicalProject
------------------hashJoin[INNER_JOIN](v1.i_category = v1_lead.i_category)(v1.i_brand = v1_lead.i_brand)(v1.s_store_name = v1_lead.s_store_name)(v1.s_company_name = v1_lead.s_company_name)(v1.rn = expr_(rn - 1))
--------------------PhysicalDistribute
----------------------PhysicalProject
------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
--------------------PhysicalDistribute
----------------------PhysicalProject
------------------------filter((if((avg_monthly_sales > 0.0000), (abs((cast(sum_sales as DOUBLE) - cast(avg_monthly_sales as DOUBLE))) / cast(avg_monthly_sales as DOUBLE)), NULL) > 0.1)(v2.d_year = 2001)(v2.avg_monthly_sales > 0.0000))
--------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
----------------------PhysicalDistribute
------------------------PhysicalProject
--------------------------filter((if((avg_monthly_sales > 0.0000), (abs((cast(sum_sales as DOUBLE) - cast(avg_monthly_sales as DOUBLE))) / cast(avg_monthly_sales as DOUBLE)), NULL) > 0.1)(v2.d_year = 2001)(v2.avg_monthly_sales > 0.0000))
----------------------------PhysicalCteConsumer ( cteId=CTEId#0 )

47 changes: 24 additions & 23 deletions regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query54.out
Original file line number Diff line number Diff line change
Expand Up @@ -32,32 +32,33 @@ PhysicalResultSink
------------------------------------------------------PhysicalOlapScan[customer_address]
----------------------------------------------------PhysicalDistribute
------------------------------------------------------PhysicalProject
--------------------------------------------------------hashAgg[LOCAL]
----------------------------------------------------------PhysicalProject
------------------------------------------------------------hashJoin[INNER_JOIN](customer.c_customer_sk = cs_or_ws_sales.customer_sk)
--------------------------------------------------------------PhysicalDistribute
----------------------------------------------------------------PhysicalProject
------------------------------------------------------------------PhysicalOlapScan[customer]
--------------------------------------------------------------PhysicalDistribute
----------------------------------------------------------------PhysicalProject
------------------------------------------------------------------hashJoin[INNER_JOIN](cs_or_ws_sales.sold_date_sk = date_dim.d_date_sk)
--------------------------------------------------------hashAgg[GLOBAL]
----------------------------------------------------------PhysicalDistribute
------------------------------------------------------------hashAgg[LOCAL]
--------------------------------------------------------------PhysicalProject
----------------------------------------------------------------hashJoin[INNER_JOIN](customer.c_customer_sk = cs_or_ws_sales.customer_sk)
------------------------------------------------------------------PhysicalProject
--------------------------------------------------------------------PhysicalOlapScan[customer]
------------------------------------------------------------------PhysicalDistribute
--------------------------------------------------------------------PhysicalProject
----------------------------------------------------------------------hashJoin[INNER_JOIN](cs_or_ws_sales.item_sk = item.i_item_sk)
------------------------------------------------------------------------PhysicalUnion
--------------------------------------------------------------------------PhysicalDistribute
----------------------------------------------------------------------------PhysicalProject
------------------------------------------------------------------------------PhysicalOlapScan[catalog_sales]
--------------------------------------------------------------------------PhysicalDistribute
----------------------------------------------------------------------------PhysicalProject
------------------------------------------------------------------------------PhysicalOlapScan[web_sales]
----------------------------------------------------------------------hashJoin[INNER_JOIN](cs_or_ws_sales.sold_date_sk = date_dim.d_date_sk)
------------------------------------------------------------------------PhysicalProject
--------------------------------------------------------------------------hashJoin[INNER_JOIN](cs_or_ws_sales.item_sk = item.i_item_sk)
----------------------------------------------------------------------------PhysicalUnion
------------------------------------------------------------------------------PhysicalDistribute
--------------------------------------------------------------------------------PhysicalProject
----------------------------------------------------------------------------------PhysicalOlapScan[catalog_sales]
------------------------------------------------------------------------------PhysicalDistribute
--------------------------------------------------------------------------------PhysicalProject
----------------------------------------------------------------------------------PhysicalOlapScan[web_sales]
----------------------------------------------------------------------------PhysicalDistribute
------------------------------------------------------------------------------PhysicalProject
--------------------------------------------------------------------------------filter((item.i_class = 'maternity')(item.i_category = 'Women'))
----------------------------------------------------------------------------------PhysicalOlapScan[item]
------------------------------------------------------------------------PhysicalDistribute
--------------------------------------------------------------------------PhysicalProject
----------------------------------------------------------------------------filter((item.i_class = 'maternity')(item.i_category = 'Women'))
------------------------------------------------------------------------------PhysicalOlapScan[item]
--------------------------------------------------------------------PhysicalDistribute
----------------------------------------------------------------------PhysicalProject
------------------------------------------------------------------------filter((date_dim.d_year = 1998)(date_dim.d_moy = 5))
--------------------------------------------------------------------------PhysicalOlapScan[date_dim]
----------------------------------------------------------------------------filter((date_dim.d_year = 1998)(date_dim.d_moy = 5))
------------------------------------------------------------------------------PhysicalOlapScan[date_dim]
------------------------------------------------PhysicalDistribute
--------------------------------------------------PhysicalProject
----------------------------------------------------PhysicalOlapScan[store]
Expand Down
22 changes: 11 additions & 11 deletions regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query76.out
Original file line number Diff line number Diff line change
Expand Up @@ -36,18 +36,18 @@ PhysicalResultSink
--------------------PhysicalDistribute
----------------------PhysicalProject
------------------------PhysicalOlapScan[date_dim]
----------------PhysicalProject
------------------hashJoin[INNER_JOIN](catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)
--------------------PhysicalProject
----------------------hashJoin[INNER_JOIN](catalog_sales.cs_item_sk = item.i_item_sk)
------------------------PhysicalDistribute
----------------PhysicalDistribute
------------------PhysicalProject
--------------------hashJoin[INNER_JOIN](catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)
----------------------PhysicalProject
------------------------hashJoin[INNER_JOIN](catalog_sales.cs_item_sk = item.i_item_sk)
--------------------------PhysicalProject
----------------------------filter(cs_warehouse_sk IS NULL)
------------------------------PhysicalOlapScan[catalog_sales]
------------------------PhysicalDistribute
--------------------------PhysicalProject
----------------------------PhysicalOlapScan[item]
--------------------PhysicalDistribute
----------------------PhysicalProject
------------------------PhysicalOlapScan[date_dim]
--------------------------PhysicalDistribute
----------------------------PhysicalProject
------------------------------PhysicalOlapScan[item]
----------------------PhysicalDistribute
------------------------PhysicalProject
--------------------------PhysicalOlapScan[date_dim]

29 changes: 15 additions & 14 deletions regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query77.out
Original file line number Diff line number Diff line change
Expand Up @@ -30,21 +30,22 @@ PhysicalResultSink
--------------------------------------PhysicalProject
----------------------------------------PhysicalOlapScan[store]
------------------------PhysicalProject
--------------------------hashAgg[LOCAL]
----------------------------PhysicalProject
------------------------------hashJoin[INNER_JOIN](store_returns.sr_store_sk = store.s_store_sk)
--------------------------------PhysicalDistribute
----------------------------------PhysicalProject
------------------------------------hashJoin[INNER_JOIN](store_returns.sr_returned_date_sk = date_dim.d_date_sk)
--------------------------------------PhysicalProject
----------------------------------------PhysicalOlapScan[store_returns]
--------------------------------------PhysicalDistribute
--------------------------hashAgg[GLOBAL]
----------------------------PhysicalDistribute
------------------------------hashAgg[LOCAL]
--------------------------------PhysicalProject
----------------------------------hashJoin[INNER_JOIN](store_returns.sr_store_sk = store.s_store_sk)
------------------------------------PhysicalProject
--------------------------------------hashJoin[INNER_JOIN](store_returns.sr_returned_date_sk = date_dim.d_date_sk)
----------------------------------------PhysicalProject
------------------------------------------filter((date_dim.d_date <= '1998-09-04')(date_dim.d_date >= '1998-08-05'))
--------------------------------------------PhysicalOlapScan[date_dim]
--------------------------------PhysicalDistribute
----------------------------------PhysicalProject
------------------------------------PhysicalOlapScan[store]
------------------------------------------PhysicalOlapScan[store_returns]
----------------------------------------PhysicalDistribute
------------------------------------------PhysicalProject
--------------------------------------------filter((date_dim.d_date <= '1998-09-04')(date_dim.d_date >= '1998-08-05'))
----------------------------------------------PhysicalOlapScan[date_dim]
------------------------------------PhysicalDistribute
--------------------------------------PhysicalProject
----------------------------------------PhysicalOlapScan[store]
--------------------PhysicalProject
----------------------NestedLoopJoin[CROSS_JOIN]
------------------------PhysicalProject
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,8 @@ PhysicalResultSink
------------------------hashJoin[INNER_JOIN](catalog_returns.cr_returned_date_sk = date_dim.d_date_sk)
--------------------------PhysicalProject
----------------------------hashJoin[INNER_JOIN](catalog_returns.cr_returning_customer_sk = customer.c_customer_sk)
------------------------------PhysicalDistribute
--------------------------------PhysicalProject
----------------------------------PhysicalOlapScan[catalog_returns]
------------------------------PhysicalProject
--------------------------------PhysicalOlapScan[catalog_returns]
------------------------------PhysicalDistribute
--------------------------------hashJoin[INNER_JOIN](customer_address.ca_address_sk = customer.c_current_addr_sk)
----------------------------------PhysicalProject
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,14 @@ PhysicalResultSink
--------------------PhysicalDistribute
----------------------PhysicalProject
------------------------hashJoin[INNER_JOIN](date_dim.d_date_sk = web_sales.ws_sold_date_sk)
--------------------------PhysicalDistribute
----------------------------PhysicalProject
------------------------------hashJoin[INNER_JOIN](item.i_item_sk = web_sales.ws_item_sk)
--------------------------PhysicalProject
----------------------------hashJoin[INNER_JOIN](item.i_item_sk = web_sales.ws_item_sk)
------------------------------PhysicalProject
--------------------------------PhysicalOlapScan[web_sales]
------------------------------PhysicalDistribute
--------------------------------PhysicalProject
----------------------------------PhysicalOlapScan[web_sales]
--------------------------------PhysicalDistribute
----------------------------------PhysicalProject
------------------------------------filter((item.i_manufact_id = 320))
--------------------------------------PhysicalOlapScan[item]
----------------------------------filter((item.i_manufact_id = 320))
------------------------------------PhysicalOlapScan[item]
--------------------------PhysicalDistribute
----------------------------PhysicalProject
------------------------------filter((date_dim.d_date <= '2002-05-27')(date_dim.d_date >= '2002-02-26'))
Expand Down
13 changes: 0 additions & 13 deletions regression-test/data/nereids_tpch_shape_sf500_p0/shape/q1.out

This file was deleted.

29 changes: 0 additions & 29 deletions regression-test/data/nereids_tpch_shape_sf500_p0/shape/q10.out

This file was deleted.

Loading

0 comments on commit 24b16ad

Please sign in to comment.