From 2d7249c72a978075e9bfc6ca84994cd99ec0fc86 Mon Sep 17 00:00:00 2001 From: 924060929 Date: Wed, 20 Aug 2025 10:20:40 +0800 Subject: [PATCH 1/4] [opt](nereids) optimize normalize window (#54947) close #54577 optimize normalize window, only push down the expression's used slots to bottom projects, which the expression not contains WindowFunction, so we can push down more filter through window. for this sql: ```sql select SUBSTR(orderdate,1,10) AS dt, ROW_NUMBER() OVER(PARTITION BY orderdate ORDER BY orderid DESC) AS rn from lineorders having dt = '2025-01-01' ``` we not push down the `dt` slot under LogicalWindow, but push down [orderdate, orderid] to the bottom projects, because if we push down `dt`, the plan tree will be: ``` LogicalFilter(substr(dt#3, 1, 10) = '2025-01-01') | LogicalWindow(rowNumber(partition by orderdate#2, order by orderid#1)) | LogicalProject(orderid#1, orderdate#2, substr(orderdate#1, 1, 10) as dt#3) ``` and can not push down filter by `PushDownFilterThroughWindow`, causing inefficiency, because dt#3 in LogicalFilter not contains in the partition key in LogicalWindow: [orderdate#2]. so we only push down orderdate in the LogicalFilter, not push down `dt`: ``` LogicalFilter(substr(orderdate#2, 1, 10) = '2025-01-01') | LogicalWindow(rowNumber(partition by orderdate#2, order by orderid#1)) | LogicalProject(orderid#1, orderdate#2) ``` and then, `PushDownFilterThroughWindow` found the LogicalFilter's `orderdate#2` contains in the LogicalWindow's partition key: [orderdate#2], and can push down filter to: ``` LogicalWindow(rowNumber(partition by orderdate#2, order by orderid#1)) | LogicalProject(orderid#1, orderdate#2) | LogicalFilter(substr(orderdate#2, 1, 10) = '2025-01-01') ``` (cherry picked from commit 0e669b97c6b70158ae75147a525c1f8ee5849df2) --- .../ExtractAndNormalizeWindowExpression.java | 39 +++++++++++ .../PushDownFilterThroughWindowTest.java | 64 ++++++++++++++++++- 2 files changed, 102 insertions(+), 1 deletion(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ExtractAndNormalizeWindowExpression.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ExtractAndNormalizeWindowExpression.java index 0216570956418c..e96cf3e105c9cd 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ExtractAndNormalizeWindowExpression.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ExtractAndNormalizeWindowExpression.java @@ -180,6 +180,45 @@ private Set collectExpressionsToBePushedDown(List e inputSlots.stream() ).distinct(); } + + // for this sql: + // select + // SUBSTR(orderdate,1,10) AS dt, + // ROW_NUMBER() OVER(PARTITION BY orderdate ORDER BY orderid DESC) AS rn + // from lineorders + // having dt = '2025-01-01' + // + // we not push down the `dt` slot under LogicalWindow, but push down [orderdate, orderid] + // to the bottom projects, because if we push down `dt`, the plan tree will be: + // + // LogicalFilter(substr(dt#3, 1, 10) = '2025-01-01') + // | + // LogicalWindow(rowNumber(partition by orderdate#2, order by orderid#1)) + // | + // LogicalProject(orderid#1, orderdate#2, substr(orderdate#1, 1, 10) as dt#3) + // + // and can not push down filter by `PushDownFilterThroughWindow`, causing inefficiency, + // because dt#3 in LogicalFilter not contains in the partition key in LogicalWindow: [orderdate#2]. + // + // so we only push down orderdate in the LogicalFilter, not push down `dt`: + // + // LogicalFilter(substr(orderdate#2, 1, 10) = '2025-01-01') + // | + // LogicalWindow(rowNumber(partition by orderdate#2, order by orderid#1)) + // | + // LogicalProject(orderid#1, orderdate#2) + // + // and then, `PushDownFilterThroughWindow` found the LogicalFilter's `orderdate#2` contains + // in the LogicalWindow's partition key: [orderdate#2], and can push down filter to: + // + // LogicalWindow(rowNumber(partition by orderdate#2, order by orderid#1)) + // | + // LogicalProject(orderid#1, orderdate#2) + // | + // LogicalFilter(substr(orderdate#2, 1, 10) = '2025-01-01') + if (expression instanceof Alias) { + return expression.getInputSlots().stream(); + } return ImmutableList.of(expression).stream(); }) .collect(ImmutableSet.toImmutableSet()); diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PushDownFilterThroughWindowTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PushDownFilterThroughWindowTest.java index 6edcd01a8ea595..3c367550cfe543 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PushDownFilterThroughWindowTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PushDownFilterThroughWindowTest.java @@ -17,6 +17,7 @@ package org.apache.doris.nereids.rules.rewrite; +import org.apache.doris.nereids.rules.RuleType; import org.apache.doris.nereids.trees.expressions.Alias; import org.apache.doris.nereids.trees.expressions.EqualTo; import org.apache.doris.nereids.trees.expressions.Expression; @@ -35,6 +36,7 @@ import org.apache.doris.nereids.util.PlanChecker; import org.apache.doris.nereids.util.PlanConstructor; import org.apache.doris.qe.ConnectContext; +import org.apache.doris.utframe.TestWithFeService; import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; @@ -42,7 +44,7 @@ import java.util.List; -class PushDownFilterThroughWindowTest implements MemoPatternMatchSupported { +class PushDownFilterThroughWindowTest extends TestWithFeService implements MemoPatternMatchSupported { private final LogicalOlapScan scan = new LogicalOlapScan(StatementScopeIdGenerator.newRelationId(), PlanConstructor.student, ImmutableList.of("")); @@ -89,4 +91,64 @@ void pushDownFilterThroughWindowTest() { ) ); } + + @Test + public void testPushDownFilter() throws Exception { + String db = "test"; + createDatabase(db); + useDatabase(db); + createTable("CREATE TABLE lineorders (\n" + + "orderdate varchar(100) NOT NULL,\n" + + "orderid int NOT NULL,\n" + + "country_id int NOT NULL,\n" + + "vender_id int NOT NULL,\n" + + "ordernum int NOT NULL,\n" + + "ordemoney int NOT NULL\n" + + ") ENGINE=OLAP\n" + + "DUPLICATE KEY(orderdate, orderid, country_id)\n" + + "COMMENT 'OLAP'\n" + + "PARTITION BY LIST(orderdate)\n" + + "(PARTITION p1992 VALUES IN (\"0-2020\"),\n" + + "PARTITION p1993 VALUES IN (\"0-2021\"),\n" + + "PARTITION p1994 VALUES IN (\"0-2022\"),\n" + + "PARTITION p1995 VALUES IN (\"0-2023\"),\n" + + "PARTITION p1996 VALUES IN (\"0-2024\"),\n" + + "PARTITION p1997 VALUES IN (\"0-2025\"))\n" + + "DISTRIBUTED BY HASH(orderid) BUCKETS 48\n" + + "PROPERTIES (\n" + + "\"replication_allocation\" = \"tag.location.default: 1\"\n" + + ")"); + + connectContext.getSessionVariable() + .setDisableNereidsRules( + RuleType.OLAP_SCAN_PARTITION_PRUNE.name() + "," + RuleType.PRUNE_EMPTY_PARTITION.name()); + + PlanChecker.from(connectContext) + .analyze("select * from ( \n" + + " select \n" + + " orderid,\n" + + " orderdate,\n" + + " country_id,\n" + + " ordernum,\n" + + " ordemoney,\n" + + " SUBSTR(lineorders.orderdate,3,4) AS dt,\n" + + " ROW_NUMBER() OVER(PARTITION BY lineorders.orderid,lineorders.orderdate ORDER BY lineorders.country_id DESC) AS rn\n" + + " from lineorders\n" + + ") a \n" + + "where SUBSTR(a.dt, 1, 4) = SUBSTR(curdate(), 1, 4)") + .rewrite() + .matchesFromRoot( + logicalResultSink( + logicalProject( + logicalWindow( + logicalProject( + logicalFilter( + logicalOlapScan() + ) + ) + ) + ) + ) + ); + } } From 90976155c5c2fa3970fb0f7cbf709fc07a757b14 Mon Sep 17 00:00:00 2001 From: 924060929 Date: Wed, 20 Aug 2025 12:41:37 +0800 Subject: [PATCH 2/4] fix --- .../nereids_hint_tpcds_p0/shape/query51.out | 31 +++++++++---------- .../shape/query51.out | 31 +++++++++---------- .../noStatsRfPrune/query51.out | 31 +++++++++---------- .../no_stats_shape/query51.out | 31 +++++++++---------- .../rf_prune/query51.out | 31 +++++++++---------- .../shape/query51.out | 31 +++++++++---------- .../tpcds_sf100/noStatsRfPrune/query51.out | 31 +++++++++---------- .../tpcds_sf100/no_stats_shape/query51.out | 31 +++++++++---------- .../tpcds_sf100/rf_prune/query51.out | 31 +++++++++---------- .../tpcds_sf100/shape/query51.out | 31 +++++++++---------- .../tpcds_sf1000/shape/query51.out | 31 +++++++++---------- 11 files changed, 165 insertions(+), 176 deletions(-) diff --git a/regression-test/data/nereids_hint_tpcds_p0/shape/query51.out b/regression-test/data/nereids_hint_tpcds_p0/shape/query51.out index a10c39885ad110..ad962e7d114470 100644 --- a/regression-test/data/nereids_hint_tpcds_p0/shape/query51.out +++ b/regression-test/data/nereids_hint_tpcds_p0/shape/query51.out @@ -4,17 +4,17 @@ PhysicalResultSink --PhysicalTopN[MERGE_SORT] ----PhysicalDistribute[DistributionSpecGather] ------PhysicalTopN[LOCAL_SORT] ---------filter((web_cumulative > store_cumulative)) -----------PhysicalWindow -------------PhysicalQuickSort[LOCAL_SORT] ---------------PhysicalDistribute[DistributionSpecHash] -----------------PhysicalProject -------------------hashJoin[FULL_OUTER_JOIN colocated] hashCondition=((web.d_date = store.d_date) and (web.item_sk = store.item_sk)) otherCondition=() ---------------------PhysicalProject -----------------------PhysicalWindow -------------------------PhysicalQuickSort[LOCAL_SORT] ---------------------------PhysicalDistribute[DistributionSpecHash] -----------------------------PhysicalProject +--------PhysicalProject +----------filter((web_cumulative > store_cumulative)) +------------PhysicalWindow +--------------PhysicalQuickSort[LOCAL_SORT] +----------------PhysicalDistribute[DistributionSpecHash] +------------------PhysicalProject +--------------------hashJoin[FULL_OUTER_JOIN colocated] hashCondition=((web.d_date = store.d_date) and (web.item_sk = store.item_sk)) otherCondition=() +----------------------PhysicalProject +------------------------PhysicalWindow +--------------------------PhysicalQuickSort[LOCAL_SORT] +----------------------------PhysicalDistribute[DistributionSpecHash] ------------------------------hashAgg[GLOBAL] --------------------------------PhysicalDistribute[DistributionSpecHash] ----------------------------------hashAgg[LOCAL] @@ -25,11 +25,10 @@ PhysicalResultSink ----------------------------------------PhysicalProject ------------------------------------------filter((date_dim.d_month_seq <= 1223) and (date_dim.d_month_seq >= 1212)) --------------------------------------------PhysicalOlapScan[date_dim] ---------------------PhysicalProject -----------------------PhysicalWindow -------------------------PhysicalQuickSort[LOCAL_SORT] ---------------------------PhysicalDistribute[DistributionSpecHash] -----------------------------PhysicalProject +----------------------PhysicalProject +------------------------PhysicalWindow +--------------------------PhysicalQuickSort[LOCAL_SORT] +----------------------------PhysicalDistribute[DistributionSpecHash] ------------------------------hashAgg[GLOBAL] --------------------------------PhysicalDistribute[DistributionSpecHash] ----------------------------------hashAgg[LOCAL] diff --git a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query51.out b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query51.out index 6c22d2df3086e3..98b43bfdfc171f 100644 --- a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query51.out +++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query51.out @@ -4,17 +4,17 @@ PhysicalResultSink --PhysicalTopN[MERGE_SORT] ----PhysicalDistribute[DistributionSpecGather] ------PhysicalTopN[LOCAL_SORT] ---------filter((web_cumulative > store_cumulative)) -----------PhysicalWindow -------------PhysicalQuickSort[LOCAL_SORT] ---------------PhysicalDistribute[DistributionSpecHash] -----------------PhysicalProject -------------------hashJoin[FULL_OUTER_JOIN colocated] hashCondition=((web.d_date = store.d_date) and (web.item_sk = store.item_sk)) otherCondition=() ---------------------PhysicalProject -----------------------PhysicalWindow -------------------------PhysicalQuickSort[LOCAL_SORT] ---------------------------PhysicalDistribute[DistributionSpecHash] -----------------------------PhysicalProject +--------PhysicalProject +----------filter((web_cumulative > store_cumulative)) +------------PhysicalWindow +--------------PhysicalQuickSort[LOCAL_SORT] +----------------PhysicalDistribute[DistributionSpecHash] +------------------PhysicalProject +--------------------hashJoin[FULL_OUTER_JOIN colocated] hashCondition=((web.d_date = store.d_date) and (web.item_sk = store.item_sk)) otherCondition=() +----------------------PhysicalProject +------------------------PhysicalWindow +--------------------------PhysicalQuickSort[LOCAL_SORT] +----------------------------PhysicalDistribute[DistributionSpecHash] ------------------------------hashAgg[GLOBAL] --------------------------------PhysicalDistribute[DistributionSpecHash] ----------------------------------hashAgg[LOCAL] @@ -25,11 +25,10 @@ PhysicalResultSink ----------------------------------------PhysicalProject ------------------------------------------filter((date_dim.d_month_seq <= 1223) and (date_dim.d_month_seq >= 1212)) --------------------------------------------PhysicalOlapScan[date_dim] ---------------------PhysicalProject -----------------------PhysicalWindow -------------------------PhysicalQuickSort[LOCAL_SORT] ---------------------------PhysicalDistribute[DistributionSpecHash] -----------------------------PhysicalProject +----------------------PhysicalProject +------------------------PhysicalWindow +--------------------------PhysicalQuickSort[LOCAL_SORT] +----------------------------PhysicalDistribute[DistributionSpecHash] ------------------------------hashAgg[GLOBAL] --------------------------------PhysicalDistribute[DistributionSpecHash] ----------------------------------hashAgg[LOCAL] diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query51.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query51.out index 470fabc0f31e81..cec684574edf4b 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query51.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query51.out @@ -4,17 +4,17 @@ PhysicalResultSink --PhysicalTopN[MERGE_SORT] ----PhysicalDistribute[DistributionSpecGather] ------PhysicalTopN[LOCAL_SORT] ---------filter((web_cumulative > store_cumulative)) -----------PhysicalWindow -------------PhysicalQuickSort[LOCAL_SORT] ---------------PhysicalDistribute[DistributionSpecHash] -----------------PhysicalProject -------------------hashJoin[FULL_OUTER_JOIN colocated] hashCondition=((web.d_date = store.d_date) and (web.item_sk = store.item_sk)) otherCondition=() ---------------------PhysicalProject -----------------------PhysicalWindow -------------------------PhysicalQuickSort[LOCAL_SORT] ---------------------------PhysicalDistribute[DistributionSpecHash] -----------------------------PhysicalProject +--------PhysicalProject +----------filter((web_cumulative > store_cumulative)) +------------PhysicalWindow +--------------PhysicalQuickSort[LOCAL_SORT] +----------------PhysicalDistribute[DistributionSpecHash] +------------------PhysicalProject +--------------------hashJoin[FULL_OUTER_JOIN colocated] hashCondition=((web.d_date = store.d_date) and (web.item_sk = store.item_sk)) otherCondition=() +----------------------PhysicalProject +------------------------PhysicalWindow +--------------------------PhysicalQuickSort[LOCAL_SORT] +----------------------------PhysicalDistribute[DistributionSpecHash] ------------------------------hashAgg[GLOBAL] --------------------------------PhysicalDistribute[DistributionSpecHash] ----------------------------------hashAgg[LOCAL] @@ -25,11 +25,10 @@ PhysicalResultSink ----------------------------------------PhysicalProject ------------------------------------------filter((date_dim.d_month_seq <= 1227) and (date_dim.d_month_seq >= 1216)) --------------------------------------------PhysicalOlapScan[date_dim] ---------------------PhysicalProject -----------------------PhysicalWindow -------------------------PhysicalQuickSort[LOCAL_SORT] ---------------------------PhysicalDistribute[DistributionSpecHash] -----------------------------PhysicalProject +----------------------PhysicalProject +------------------------PhysicalWindow +--------------------------PhysicalQuickSort[LOCAL_SORT] +----------------------------PhysicalDistribute[DistributionSpecHash] ------------------------------hashAgg[GLOBAL] --------------------------------PhysicalDistribute[DistributionSpecHash] ----------------------------------hashAgg[LOCAL] diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query51.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query51.out index 470fabc0f31e81..cec684574edf4b 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query51.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query51.out @@ -4,17 +4,17 @@ PhysicalResultSink --PhysicalTopN[MERGE_SORT] ----PhysicalDistribute[DistributionSpecGather] ------PhysicalTopN[LOCAL_SORT] ---------filter((web_cumulative > store_cumulative)) -----------PhysicalWindow -------------PhysicalQuickSort[LOCAL_SORT] ---------------PhysicalDistribute[DistributionSpecHash] -----------------PhysicalProject -------------------hashJoin[FULL_OUTER_JOIN colocated] hashCondition=((web.d_date = store.d_date) and (web.item_sk = store.item_sk)) otherCondition=() ---------------------PhysicalProject -----------------------PhysicalWindow -------------------------PhysicalQuickSort[LOCAL_SORT] ---------------------------PhysicalDistribute[DistributionSpecHash] -----------------------------PhysicalProject +--------PhysicalProject +----------filter((web_cumulative > store_cumulative)) +------------PhysicalWindow +--------------PhysicalQuickSort[LOCAL_SORT] +----------------PhysicalDistribute[DistributionSpecHash] +------------------PhysicalProject +--------------------hashJoin[FULL_OUTER_JOIN colocated] hashCondition=((web.d_date = store.d_date) and (web.item_sk = store.item_sk)) otherCondition=() +----------------------PhysicalProject +------------------------PhysicalWindow +--------------------------PhysicalQuickSort[LOCAL_SORT] +----------------------------PhysicalDistribute[DistributionSpecHash] ------------------------------hashAgg[GLOBAL] --------------------------------PhysicalDistribute[DistributionSpecHash] ----------------------------------hashAgg[LOCAL] @@ -25,11 +25,10 @@ PhysicalResultSink ----------------------------------------PhysicalProject ------------------------------------------filter((date_dim.d_month_seq <= 1227) and (date_dim.d_month_seq >= 1216)) --------------------------------------------PhysicalOlapScan[date_dim] ---------------------PhysicalProject -----------------------PhysicalWindow -------------------------PhysicalQuickSort[LOCAL_SORT] ---------------------------PhysicalDistribute[DistributionSpecHash] -----------------------------PhysicalProject +----------------------PhysicalProject +------------------------PhysicalWindow +--------------------------PhysicalQuickSort[LOCAL_SORT] +----------------------------PhysicalDistribute[DistributionSpecHash] ------------------------------hashAgg[GLOBAL] --------------------------------PhysicalDistribute[DistributionSpecHash] ----------------------------------hashAgg[LOCAL] diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query51.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query51.out index 470fabc0f31e81..cec684574edf4b 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query51.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query51.out @@ -4,17 +4,17 @@ PhysicalResultSink --PhysicalTopN[MERGE_SORT] ----PhysicalDistribute[DistributionSpecGather] ------PhysicalTopN[LOCAL_SORT] ---------filter((web_cumulative > store_cumulative)) -----------PhysicalWindow -------------PhysicalQuickSort[LOCAL_SORT] ---------------PhysicalDistribute[DistributionSpecHash] -----------------PhysicalProject -------------------hashJoin[FULL_OUTER_JOIN colocated] hashCondition=((web.d_date = store.d_date) and (web.item_sk = store.item_sk)) otherCondition=() ---------------------PhysicalProject -----------------------PhysicalWindow -------------------------PhysicalQuickSort[LOCAL_SORT] ---------------------------PhysicalDistribute[DistributionSpecHash] -----------------------------PhysicalProject +--------PhysicalProject +----------filter((web_cumulative > store_cumulative)) +------------PhysicalWindow +--------------PhysicalQuickSort[LOCAL_SORT] +----------------PhysicalDistribute[DistributionSpecHash] +------------------PhysicalProject +--------------------hashJoin[FULL_OUTER_JOIN colocated] hashCondition=((web.d_date = store.d_date) and (web.item_sk = store.item_sk)) otherCondition=() +----------------------PhysicalProject +------------------------PhysicalWindow +--------------------------PhysicalQuickSort[LOCAL_SORT] +----------------------------PhysicalDistribute[DistributionSpecHash] ------------------------------hashAgg[GLOBAL] --------------------------------PhysicalDistribute[DistributionSpecHash] ----------------------------------hashAgg[LOCAL] @@ -25,11 +25,10 @@ PhysicalResultSink ----------------------------------------PhysicalProject ------------------------------------------filter((date_dim.d_month_seq <= 1227) and (date_dim.d_month_seq >= 1216)) --------------------------------------------PhysicalOlapScan[date_dim] ---------------------PhysicalProject -----------------------PhysicalWindow -------------------------PhysicalQuickSort[LOCAL_SORT] ---------------------------PhysicalDistribute[DistributionSpecHash] -----------------------------PhysicalProject +----------------------PhysicalProject +------------------------PhysicalWindow +--------------------------PhysicalQuickSort[LOCAL_SORT] +----------------------------PhysicalDistribute[DistributionSpecHash] ------------------------------hashAgg[GLOBAL] --------------------------------PhysicalDistribute[DistributionSpecHash] ----------------------------------hashAgg[LOCAL] diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query51.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query51.out index 470fabc0f31e81..cec684574edf4b 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query51.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query51.out @@ -4,17 +4,17 @@ PhysicalResultSink --PhysicalTopN[MERGE_SORT] ----PhysicalDistribute[DistributionSpecGather] ------PhysicalTopN[LOCAL_SORT] ---------filter((web_cumulative > store_cumulative)) -----------PhysicalWindow -------------PhysicalQuickSort[LOCAL_SORT] ---------------PhysicalDistribute[DistributionSpecHash] -----------------PhysicalProject -------------------hashJoin[FULL_OUTER_JOIN colocated] hashCondition=((web.d_date = store.d_date) and (web.item_sk = store.item_sk)) otherCondition=() ---------------------PhysicalProject -----------------------PhysicalWindow -------------------------PhysicalQuickSort[LOCAL_SORT] ---------------------------PhysicalDistribute[DistributionSpecHash] -----------------------------PhysicalProject +--------PhysicalProject +----------filter((web_cumulative > store_cumulative)) +------------PhysicalWindow +--------------PhysicalQuickSort[LOCAL_SORT] +----------------PhysicalDistribute[DistributionSpecHash] +------------------PhysicalProject +--------------------hashJoin[FULL_OUTER_JOIN colocated] hashCondition=((web.d_date = store.d_date) and (web.item_sk = store.item_sk)) otherCondition=() +----------------------PhysicalProject +------------------------PhysicalWindow +--------------------------PhysicalQuickSort[LOCAL_SORT] +----------------------------PhysicalDistribute[DistributionSpecHash] ------------------------------hashAgg[GLOBAL] --------------------------------PhysicalDistribute[DistributionSpecHash] ----------------------------------hashAgg[LOCAL] @@ -25,11 +25,10 @@ PhysicalResultSink ----------------------------------------PhysicalProject ------------------------------------------filter((date_dim.d_month_seq <= 1227) and (date_dim.d_month_seq >= 1216)) --------------------------------------------PhysicalOlapScan[date_dim] ---------------------PhysicalProject -----------------------PhysicalWindow -------------------------PhysicalQuickSort[LOCAL_SORT] ---------------------------PhysicalDistribute[DistributionSpecHash] -----------------------------PhysicalProject +----------------------PhysicalProject +------------------------PhysicalWindow +--------------------------PhysicalQuickSort[LOCAL_SORT] +----------------------------PhysicalDistribute[DistributionSpecHash] ------------------------------hashAgg[GLOBAL] --------------------------------PhysicalDistribute[DistributionSpecHash] ----------------------------------hashAgg[LOCAL] diff --git a/regression-test/data/new_shapes_p0/tpcds_sf100/noStatsRfPrune/query51.out b/regression-test/data/new_shapes_p0/tpcds_sf100/noStatsRfPrune/query51.out index 470fabc0f31e81..cec684574edf4b 100644 --- a/regression-test/data/new_shapes_p0/tpcds_sf100/noStatsRfPrune/query51.out +++ b/regression-test/data/new_shapes_p0/tpcds_sf100/noStatsRfPrune/query51.out @@ -4,17 +4,17 @@ PhysicalResultSink --PhysicalTopN[MERGE_SORT] ----PhysicalDistribute[DistributionSpecGather] ------PhysicalTopN[LOCAL_SORT] ---------filter((web_cumulative > store_cumulative)) -----------PhysicalWindow -------------PhysicalQuickSort[LOCAL_SORT] ---------------PhysicalDistribute[DistributionSpecHash] -----------------PhysicalProject -------------------hashJoin[FULL_OUTER_JOIN colocated] hashCondition=((web.d_date = store.d_date) and (web.item_sk = store.item_sk)) otherCondition=() ---------------------PhysicalProject -----------------------PhysicalWindow -------------------------PhysicalQuickSort[LOCAL_SORT] ---------------------------PhysicalDistribute[DistributionSpecHash] -----------------------------PhysicalProject +--------PhysicalProject +----------filter((web_cumulative > store_cumulative)) +------------PhysicalWindow +--------------PhysicalQuickSort[LOCAL_SORT] +----------------PhysicalDistribute[DistributionSpecHash] +------------------PhysicalProject +--------------------hashJoin[FULL_OUTER_JOIN colocated] hashCondition=((web.d_date = store.d_date) and (web.item_sk = store.item_sk)) otherCondition=() +----------------------PhysicalProject +------------------------PhysicalWindow +--------------------------PhysicalQuickSort[LOCAL_SORT] +----------------------------PhysicalDistribute[DistributionSpecHash] ------------------------------hashAgg[GLOBAL] --------------------------------PhysicalDistribute[DistributionSpecHash] ----------------------------------hashAgg[LOCAL] @@ -25,11 +25,10 @@ PhysicalResultSink ----------------------------------------PhysicalProject ------------------------------------------filter((date_dim.d_month_seq <= 1227) and (date_dim.d_month_seq >= 1216)) --------------------------------------------PhysicalOlapScan[date_dim] ---------------------PhysicalProject -----------------------PhysicalWindow -------------------------PhysicalQuickSort[LOCAL_SORT] ---------------------------PhysicalDistribute[DistributionSpecHash] -----------------------------PhysicalProject +----------------------PhysicalProject +------------------------PhysicalWindow +--------------------------PhysicalQuickSort[LOCAL_SORT] +----------------------------PhysicalDistribute[DistributionSpecHash] ------------------------------hashAgg[GLOBAL] --------------------------------PhysicalDistribute[DistributionSpecHash] ----------------------------------hashAgg[LOCAL] diff --git a/regression-test/data/new_shapes_p0/tpcds_sf100/no_stats_shape/query51.out b/regression-test/data/new_shapes_p0/tpcds_sf100/no_stats_shape/query51.out index 470fabc0f31e81..cec684574edf4b 100644 --- a/regression-test/data/new_shapes_p0/tpcds_sf100/no_stats_shape/query51.out +++ b/regression-test/data/new_shapes_p0/tpcds_sf100/no_stats_shape/query51.out @@ -4,17 +4,17 @@ PhysicalResultSink --PhysicalTopN[MERGE_SORT] ----PhysicalDistribute[DistributionSpecGather] ------PhysicalTopN[LOCAL_SORT] ---------filter((web_cumulative > store_cumulative)) -----------PhysicalWindow -------------PhysicalQuickSort[LOCAL_SORT] ---------------PhysicalDistribute[DistributionSpecHash] -----------------PhysicalProject -------------------hashJoin[FULL_OUTER_JOIN colocated] hashCondition=((web.d_date = store.d_date) and (web.item_sk = store.item_sk)) otherCondition=() ---------------------PhysicalProject -----------------------PhysicalWindow -------------------------PhysicalQuickSort[LOCAL_SORT] ---------------------------PhysicalDistribute[DistributionSpecHash] -----------------------------PhysicalProject +--------PhysicalProject +----------filter((web_cumulative > store_cumulative)) +------------PhysicalWindow +--------------PhysicalQuickSort[LOCAL_SORT] +----------------PhysicalDistribute[DistributionSpecHash] +------------------PhysicalProject +--------------------hashJoin[FULL_OUTER_JOIN colocated] hashCondition=((web.d_date = store.d_date) and (web.item_sk = store.item_sk)) otherCondition=() +----------------------PhysicalProject +------------------------PhysicalWindow +--------------------------PhysicalQuickSort[LOCAL_SORT] +----------------------------PhysicalDistribute[DistributionSpecHash] ------------------------------hashAgg[GLOBAL] --------------------------------PhysicalDistribute[DistributionSpecHash] ----------------------------------hashAgg[LOCAL] @@ -25,11 +25,10 @@ PhysicalResultSink ----------------------------------------PhysicalProject ------------------------------------------filter((date_dim.d_month_seq <= 1227) and (date_dim.d_month_seq >= 1216)) --------------------------------------------PhysicalOlapScan[date_dim] ---------------------PhysicalProject -----------------------PhysicalWindow -------------------------PhysicalQuickSort[LOCAL_SORT] ---------------------------PhysicalDistribute[DistributionSpecHash] -----------------------------PhysicalProject +----------------------PhysicalProject +------------------------PhysicalWindow +--------------------------PhysicalQuickSort[LOCAL_SORT] +----------------------------PhysicalDistribute[DistributionSpecHash] ------------------------------hashAgg[GLOBAL] --------------------------------PhysicalDistribute[DistributionSpecHash] ----------------------------------hashAgg[LOCAL] diff --git a/regression-test/data/new_shapes_p0/tpcds_sf100/rf_prune/query51.out b/regression-test/data/new_shapes_p0/tpcds_sf100/rf_prune/query51.out index 470fabc0f31e81..cec684574edf4b 100644 --- a/regression-test/data/new_shapes_p0/tpcds_sf100/rf_prune/query51.out +++ b/regression-test/data/new_shapes_p0/tpcds_sf100/rf_prune/query51.out @@ -4,17 +4,17 @@ PhysicalResultSink --PhysicalTopN[MERGE_SORT] ----PhysicalDistribute[DistributionSpecGather] ------PhysicalTopN[LOCAL_SORT] ---------filter((web_cumulative > store_cumulative)) -----------PhysicalWindow -------------PhysicalQuickSort[LOCAL_SORT] ---------------PhysicalDistribute[DistributionSpecHash] -----------------PhysicalProject -------------------hashJoin[FULL_OUTER_JOIN colocated] hashCondition=((web.d_date = store.d_date) and (web.item_sk = store.item_sk)) otherCondition=() ---------------------PhysicalProject -----------------------PhysicalWindow -------------------------PhysicalQuickSort[LOCAL_SORT] ---------------------------PhysicalDistribute[DistributionSpecHash] -----------------------------PhysicalProject +--------PhysicalProject +----------filter((web_cumulative > store_cumulative)) +------------PhysicalWindow +--------------PhysicalQuickSort[LOCAL_SORT] +----------------PhysicalDistribute[DistributionSpecHash] +------------------PhysicalProject +--------------------hashJoin[FULL_OUTER_JOIN colocated] hashCondition=((web.d_date = store.d_date) and (web.item_sk = store.item_sk)) otherCondition=() +----------------------PhysicalProject +------------------------PhysicalWindow +--------------------------PhysicalQuickSort[LOCAL_SORT] +----------------------------PhysicalDistribute[DistributionSpecHash] ------------------------------hashAgg[GLOBAL] --------------------------------PhysicalDistribute[DistributionSpecHash] ----------------------------------hashAgg[LOCAL] @@ -25,11 +25,10 @@ PhysicalResultSink ----------------------------------------PhysicalProject ------------------------------------------filter((date_dim.d_month_seq <= 1227) and (date_dim.d_month_seq >= 1216)) --------------------------------------------PhysicalOlapScan[date_dim] ---------------------PhysicalProject -----------------------PhysicalWindow -------------------------PhysicalQuickSort[LOCAL_SORT] ---------------------------PhysicalDistribute[DistributionSpecHash] -----------------------------PhysicalProject +----------------------PhysicalProject +------------------------PhysicalWindow +--------------------------PhysicalQuickSort[LOCAL_SORT] +----------------------------PhysicalDistribute[DistributionSpecHash] ------------------------------hashAgg[GLOBAL] --------------------------------PhysicalDistribute[DistributionSpecHash] ----------------------------------hashAgg[LOCAL] diff --git a/regression-test/data/new_shapes_p0/tpcds_sf100/shape/query51.out b/regression-test/data/new_shapes_p0/tpcds_sf100/shape/query51.out index 470fabc0f31e81..cec684574edf4b 100644 --- a/regression-test/data/new_shapes_p0/tpcds_sf100/shape/query51.out +++ b/regression-test/data/new_shapes_p0/tpcds_sf100/shape/query51.out @@ -4,17 +4,17 @@ PhysicalResultSink --PhysicalTopN[MERGE_SORT] ----PhysicalDistribute[DistributionSpecGather] ------PhysicalTopN[LOCAL_SORT] ---------filter((web_cumulative > store_cumulative)) -----------PhysicalWindow -------------PhysicalQuickSort[LOCAL_SORT] ---------------PhysicalDistribute[DistributionSpecHash] -----------------PhysicalProject -------------------hashJoin[FULL_OUTER_JOIN colocated] hashCondition=((web.d_date = store.d_date) and (web.item_sk = store.item_sk)) otherCondition=() ---------------------PhysicalProject -----------------------PhysicalWindow -------------------------PhysicalQuickSort[LOCAL_SORT] ---------------------------PhysicalDistribute[DistributionSpecHash] -----------------------------PhysicalProject +--------PhysicalProject +----------filter((web_cumulative > store_cumulative)) +------------PhysicalWindow +--------------PhysicalQuickSort[LOCAL_SORT] +----------------PhysicalDistribute[DistributionSpecHash] +------------------PhysicalProject +--------------------hashJoin[FULL_OUTER_JOIN colocated] hashCondition=((web.d_date = store.d_date) and (web.item_sk = store.item_sk)) otherCondition=() +----------------------PhysicalProject +------------------------PhysicalWindow +--------------------------PhysicalQuickSort[LOCAL_SORT] +----------------------------PhysicalDistribute[DistributionSpecHash] ------------------------------hashAgg[GLOBAL] --------------------------------PhysicalDistribute[DistributionSpecHash] ----------------------------------hashAgg[LOCAL] @@ -25,11 +25,10 @@ PhysicalResultSink ----------------------------------------PhysicalProject ------------------------------------------filter((date_dim.d_month_seq <= 1227) and (date_dim.d_month_seq >= 1216)) --------------------------------------------PhysicalOlapScan[date_dim] ---------------------PhysicalProject -----------------------PhysicalWindow -------------------------PhysicalQuickSort[LOCAL_SORT] ---------------------------PhysicalDistribute[DistributionSpecHash] -----------------------------PhysicalProject +----------------------PhysicalProject +------------------------PhysicalWindow +--------------------------PhysicalQuickSort[LOCAL_SORT] +----------------------------PhysicalDistribute[DistributionSpecHash] ------------------------------hashAgg[GLOBAL] --------------------------------PhysicalDistribute[DistributionSpecHash] ----------------------------------hashAgg[LOCAL] diff --git a/regression-test/data/new_shapes_p0/tpcds_sf1000/shape/query51.out b/regression-test/data/new_shapes_p0/tpcds_sf1000/shape/query51.out index 6c22d2df3086e3..98b43bfdfc171f 100644 --- a/regression-test/data/new_shapes_p0/tpcds_sf1000/shape/query51.out +++ b/regression-test/data/new_shapes_p0/tpcds_sf1000/shape/query51.out @@ -4,17 +4,17 @@ PhysicalResultSink --PhysicalTopN[MERGE_SORT] ----PhysicalDistribute[DistributionSpecGather] ------PhysicalTopN[LOCAL_SORT] ---------filter((web_cumulative > store_cumulative)) -----------PhysicalWindow -------------PhysicalQuickSort[LOCAL_SORT] ---------------PhysicalDistribute[DistributionSpecHash] -----------------PhysicalProject -------------------hashJoin[FULL_OUTER_JOIN colocated] hashCondition=((web.d_date = store.d_date) and (web.item_sk = store.item_sk)) otherCondition=() ---------------------PhysicalProject -----------------------PhysicalWindow -------------------------PhysicalQuickSort[LOCAL_SORT] ---------------------------PhysicalDistribute[DistributionSpecHash] -----------------------------PhysicalProject +--------PhysicalProject +----------filter((web_cumulative > store_cumulative)) +------------PhysicalWindow +--------------PhysicalQuickSort[LOCAL_SORT] +----------------PhysicalDistribute[DistributionSpecHash] +------------------PhysicalProject +--------------------hashJoin[FULL_OUTER_JOIN colocated] hashCondition=((web.d_date = store.d_date) and (web.item_sk = store.item_sk)) otherCondition=() +----------------------PhysicalProject +------------------------PhysicalWindow +--------------------------PhysicalQuickSort[LOCAL_SORT] +----------------------------PhysicalDistribute[DistributionSpecHash] ------------------------------hashAgg[GLOBAL] --------------------------------PhysicalDistribute[DistributionSpecHash] ----------------------------------hashAgg[LOCAL] @@ -25,11 +25,10 @@ PhysicalResultSink ----------------------------------------PhysicalProject ------------------------------------------filter((date_dim.d_month_seq <= 1223) and (date_dim.d_month_seq >= 1212)) --------------------------------------------PhysicalOlapScan[date_dim] ---------------------PhysicalProject -----------------------PhysicalWindow -------------------------PhysicalQuickSort[LOCAL_SORT] ---------------------------PhysicalDistribute[DistributionSpecHash] -----------------------------PhysicalProject +----------------------PhysicalProject +------------------------PhysicalWindow +--------------------------PhysicalQuickSort[LOCAL_SORT] +----------------------------PhysicalDistribute[DistributionSpecHash] ------------------------------hashAgg[GLOBAL] --------------------------------PhysicalDistribute[DistributionSpecHash] ----------------------------------hashAgg[LOCAL] From ec5f332cef4a6d0ae80965c46c965941dd29d1c5 Mon Sep 17 00:00:00 2001 From: 924060929 Date: Wed, 20 Aug 2025 12:51:43 +0800 Subject: [PATCH 3/4] fix --- .../data/nereids_p0/cte/test_cte_filter_pushdown.out | 2 +- .../data/nereids_rules_p0/cte/test_cte_filter_pushdown.out | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/regression-test/data/nereids_p0/cte/test_cte_filter_pushdown.out b/regression-test/data/nereids_p0/cte/test_cte_filter_pushdown.out index 0bbae0dc25f3a1..edcbfcb6022c5f 100644 --- a/regression-test/data/nereids_p0/cte/test_cte_filter_pushdown.out +++ b/regression-test/data/nereids_p0/cte/test_cte_filter_pushdown.out @@ -30,7 +30,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) -- !cte_filter_pushdown_3 -- PhysicalResultSink --hashJoin[INNER_JOIN] hashCondition=((k3 = dd.k3)) otherCondition=() -----filter((tmp2.k3 = 0)) +----filter((tmp.k3 = 0)) ------PhysicalWindow --------PhysicalQuickSort[LOCAL_SORT] ----------filter((tmp.k1 = 1)) diff --git a/regression-test/data/nereids_rules_p0/cte/test_cte_filter_pushdown.out b/regression-test/data/nereids_rules_p0/cte/test_cte_filter_pushdown.out index 7dd6492aa12499..db4dacb4b6ed51 100644 --- a/regression-test/data/nereids_rules_p0/cte/test_cte_filter_pushdown.out +++ b/regression-test/data/nereids_rules_p0/cte/test_cte_filter_pushdown.out @@ -30,7 +30,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) -- !cte_filter_pushdown_3 -- PhysicalResultSink --hashJoin[INNER_JOIN] hashCondition=((k3 = dd.k3)) otherCondition=() -----filter((tmp2.k3 = 0)) +----filter((tmp.k3 = 0)) ------PhysicalWindow --------PhysicalQuickSort[LOCAL_SORT] ----------filter((tmp.k1 = 1)) From bd96924090d201725f68410daf805e8ed7e57f2d Mon Sep 17 00:00:00 2001 From: 924060929 Date: Wed, 20 Aug 2025 15:31:49 +0800 Subject: [PATCH 4/4] fix --- .../shape/query51.out | 31 +++++++++---------- .../tpch/push_filter_window_eqset.out | 7 ++--- 2 files changed, 18 insertions(+), 20 deletions(-) diff --git a/regression-test/data/nereids_tpcds_shape_sf10t_orc/shape/query51.out b/regression-test/data/nereids_tpcds_shape_sf10t_orc/shape/query51.out index 5635b5dbabf1b6..5d049b5a6bb2dc 100644 --- a/regression-test/data/nereids_tpcds_shape_sf10t_orc/shape/query51.out +++ b/regression-test/data/nereids_tpcds_shape_sf10t_orc/shape/query51.out @@ -4,17 +4,17 @@ PhysicalResultSink --PhysicalTopN[MERGE_SORT] ----PhysicalDistribute[DistributionSpecGather] ------PhysicalTopN[LOCAL_SORT] ---------filter((web_cumulative > store_cumulative)) -----------PhysicalWindow -------------PhysicalQuickSort[LOCAL_SORT] ---------------PhysicalDistribute[DistributionSpecHash] -----------------PhysicalProject -------------------hashJoin[FULL_OUTER_JOIN colocated] hashCondition=((web.d_date = store.d_date) and (web.item_sk = store.item_sk)) otherCondition=() ---------------------PhysicalProject -----------------------PhysicalWindow -------------------------PhysicalQuickSort[LOCAL_SORT] ---------------------------PhysicalDistribute[DistributionSpecHash] -----------------------------PhysicalProject +--------PhysicalProject +----------filter((web_cumulative > store_cumulative)) +------------PhysicalWindow +--------------PhysicalQuickSort[LOCAL_SORT] +----------------PhysicalDistribute[DistributionSpecHash] +------------------PhysicalProject +--------------------hashJoin[FULL_OUTER_JOIN colocated] hashCondition=((web.d_date = store.d_date) and (web.item_sk = store.item_sk)) otherCondition=() +----------------------PhysicalProject +------------------------PhysicalWindow +--------------------------PhysicalQuickSort[LOCAL_SORT] +----------------------------PhysicalDistribute[DistributionSpecHash] ------------------------------hashAgg[GLOBAL] --------------------------------PhysicalDistribute[DistributionSpecHash] ----------------------------------hashAgg[LOCAL] @@ -25,11 +25,10 @@ PhysicalResultSink ----------------------------------------PhysicalProject ------------------------------------------filter((date_dim.d_month_seq <= 1225) and (date_dim.d_month_seq >= 1214)) --------------------------------------------PhysicalOlapScan[date_dim] ---------------------PhysicalProject -----------------------PhysicalWindow -------------------------PhysicalQuickSort[LOCAL_SORT] ---------------------------PhysicalDistribute[DistributionSpecHash] -----------------------------PhysicalProject +----------------------PhysicalProject +------------------------PhysicalWindow +--------------------------PhysicalQuickSort[LOCAL_SORT] +----------------------------PhysicalDistribute[DistributionSpecHash] ------------------------------hashAgg[GLOBAL] --------------------------------PhysicalDistribute[DistributionSpecHash] ----------------------------------hashAgg[LOCAL] diff --git a/regression-test/data/nereids_tpch_p0/tpch/push_filter_window_eqset.out b/regression-test/data/nereids_tpch_p0/tpch/push_filter_window_eqset.out index 0a14dd5f07c181..bb38206fc48490 100644 --- a/regression-test/data/nereids_tpch_p0/tpch/push_filter_window_eqset.out +++ b/regression-test/data/nereids_tpch_p0/tpch/push_filter_window_eqset.out @@ -5,8 +5,7 @@ PhysicalResultSink ----PhysicalProject ------PhysicalWindow --------PhysicalQuickSort[LOCAL_SORT] -----------PhysicalDistribute[DistributionSpecHash] -------------PhysicalProject ---------------filter((region.r_regionkey = 1)) -----------------PhysicalOlapScan[region] +----------PhysicalProject +------------filter((region.r_regionkey = 1)) +--------------PhysicalOlapScan[region]